{"id":"https://openalex.org/W2997351497","doi":"https://doi.org/10.1145/3231737","title":"Convolutional Attention Networks for Scene Text Recognition","display_name":"Convolutional Attention Networks for Scene Text Recognition","publication_year":2019,"publication_date":"2019-01-24","ids":{"openalex":"https://openalex.org/W2997351497","doi":"https://doi.org/10.1145/3231737","mag":"2997351497"},"language":"en","primary_location":{"id":"doi:10.1145/3231737","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3231737","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078162380","display_name":"Hongtao Xie","orcid":"https://orcid.org/0000-0002-6249-5315"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongtao Xie","raw_affiliation_strings":["School of Information Science and Technology, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008604905","display_name":"Shancheng Fang","orcid":"https://orcid.org/0000-0002-3100-3664"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shancheng Fang","raw_affiliation_strings":["Institute of Information Engineering, Chinese Academy of Sciences, China and School of Cyber Security, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, Chinese Academy of Sciences, China and School of Cyber Security, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210156404","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003217535","display_name":"Zheng-Jun Zha","orcid":"https://orcid.org/0000-0003-2510-8993"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zheng-Jun Zha","raw_affiliation_strings":["School of Information Science and Technology, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034313244","display_name":"Yating Yang","orcid":"https://orcid.org/0000-0002-2639-3944"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210106108","display_name":"Xinjiang Technical Institute of Physics & Chemistry","ror":"https://ror.org/00x44h034","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210106108"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yating Yang","raw_affiliation_strings":["Xinjiang Technical Institute of Physics and Chemistry, Chinese Academy of Sciences, Urumchi, China"],"affiliations":[{"raw_affiliation_string":"Xinjiang Technical Institute of Physics and Chemistry, Chinese Academy of Sciences, Urumchi, China","institution_ids":["https://openalex.org/I4210106108","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100443879","display_name":"Yan Li","orcid":"https://orcid.org/0000-0002-4694-4926"},"institutions":[{"id":"https://openalex.org/I4401726859","display_name":"Kuaishou (China)","ror":"https://ror.org/0258as409","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726859"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Li","raw_affiliation_strings":["Beijing Kuaishou Technology Co., Ltd. Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Kuaishou Technology Co., Ltd. Beijing, China","institution_ids":["https://openalex.org/I4401726859"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046305086","display_name":"Yongdong Zhang","orcid":"https://orcid.org/0000-0002-1151-1792"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongdong Zhang","raw_affiliation_strings":["School of Information Science and Technology, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5078162380"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":6.782,"has_fulltext":false,"cited_by_count":85,"citation_normalized_percentile":{"value":0.97509784,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"15","issue":"1s","first_page":"1","last_page":"17"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12707","display_name":"Vehicle License Plate Recognition","score":0.9861000180244446,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8398549556732178},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.8049592971801758},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.7813589572906494},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6986116170883179},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6460110545158386},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.6087425947189331},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.47508588433265686},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4706125855445862},{"id":"https://openalex.org/keywords/convolutional-code","display_name":"Convolutional code","score":0.45012640953063965},{"id":"https://openalex.org/keywords/layer","display_name":"Layer (electronics)","score":0.4375542998313904},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.436921626329422},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.40574145317077637},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.20140445232391357},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.13448497653007507}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8398549556732178},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.8049592971801758},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.7813589572906494},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6986116170883179},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6460110545158386},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.6087425947189331},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.47508588433265686},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4706125855445862},{"id":"https://openalex.org/C157899210","wikidata":"https://www.wikidata.org/wiki/Q1395022","display_name":"Convolutional code","level":3,"score":0.45012640953063965},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.4375542998313904},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.436921626329422},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.40574145317077637},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.20140445232391357},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.13448497653007507},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3231737","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3231737","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6600000262260437,"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11"}],"awards":[{"id":"https://openalex.org/G3379710740","display_name":null,"funder_award_id":"61525206,61771468,61622211,61472392 and 61620106009","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4530701113","display_name":null,"funder_award_id":"WK2100100030","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G7532226545","display_name":null,"funder_award_id":"2017209","funder_id":"https://openalex.org/F4320322847","funder_display_name":"Youth Innovation Promotion Association of the Chinese Academy of Sciences"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322847","display_name":"Youth Innovation Promotion Association of the Chinese Academy of Sciences","ror":"https://ror.org/031141b54"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W70975097","https://openalex.org/W1677182931","https://openalex.org/W1895191496","https://openalex.org/W1902237438","https://openalex.org/W1972603974","https://openalex.org/W1978729128","https://openalex.org/W1983558466","https://openalex.org/W1990550880","https://openalex.org/W1998042868","https://openalex.org/W2008806374","https://openalex.org/W2053317383","https://openalex.org/W2064675550","https://openalex.org/W2067953788","https://openalex.org/W2071027807","https://openalex.org/W2122221966","https://openalex.org/W2127141656","https://openalex.org/W2147069236","https://openalex.org/W2194775991","https://openalex.org/W2239589426","https://openalex.org/W2294053032","https://openalex.org/W2302255633","https://openalex.org/W2515116636","https://openalex.org/W2529099292","https://openalex.org/W2558687840","https://openalex.org/W2560674852","https://openalex.org/W2560835477","https://openalex.org/W2594435365","https://openalex.org/W2602882433","https://openalex.org/W2760998155","https://openalex.org/W2762383441","https://openalex.org/W2887808321","https://openalex.org/W2963517393","https://openalex.org/W2964199361","https://openalex.org/W6600195168"],"related_works":["https://openalex.org/W2953234277","https://openalex.org/W2626256601","https://openalex.org/W2900413183","https://openalex.org/W4390975304","https://openalex.org/W147410782","https://openalex.org/W3022252430","https://openalex.org/W4287804464","https://openalex.org/W2132373020","https://openalex.org/W2096049278","https://openalex.org/W2221419418"],"abstract_inverted_index":{"In":[0],"this":[1],"article,":[2],"we":[3,106],"present":[4],"Convoluitional":[5],"Attention":[6],"Networks":[7,27,32],"(CAN)":[8],"for":[9,17,145],"unconstrained":[10],"scene":[11,18,146],"text":[12,19,147],"recognition.":[13],"Recent":[14],"dominant":[15],"approaches":[16],"recognition":[20],"are":[21,120],"mainly":[22],"based":[23],"on":[24,57,142],"Convolutional":[25],"Neural":[26,31],"(CNN)":[28],"and":[29,39,59,85,105,116,127,155,167],"Recurrent":[30],"(RNN),":[33],"where":[34],"the":[35,40,78,86,93,103,162,183],"CNN":[36,58,84],"encodes":[37],"images":[38],"RNN":[41],"generates":[42],"character":[43],"sequences.":[44],"Our":[45],"CAN":[46,53,72],"is":[47,54,80,88,96],"different":[48,165],"from":[49],"these":[50],"methods;":[51],"our":[52,68,133,170],"completely":[55],"built":[56],"includes":[60],"an":[61],"attention":[62,94,111],"mechanism.":[63],"The":[64,158],"distinctive":[65],"characteristics":[66],"of":[67,102,137,164,185],"method":[69,112,172],"include":[70],"(i)":[71],"follows":[73],"encoder-decoder":[74],"architecture,":[75],"in":[76,98,122],"which":[77],"encoder":[79,126],"a":[81,89,108,124,128,135],"deep":[82],"two-dimensional":[83],"decoder":[87,130],"one-dimensional":[90],"CNN;":[91],"(ii)":[92],"mechanism":[95],"applied":[97],"every":[99],"convolutional":[100],"layer":[101],"decoder,":[104],"propose":[107],"novel":[109],"spatial":[110,125],"using":[113],"average":[114],"pooling;":[115],"(iii)":[117],"position":[118],"embeddings":[119],"equipped":[121],"both":[123],"sequence":[129],"to":[131],"give":[132],"networks":[134],"sense":[136],"location.":[138],"We":[139],"conduct":[140],"experiments":[141],"standard":[143],"datasets":[144],"recognition,":[148],"including":[149],"Street":[150],"View":[151],"Text":[152],",":[153],"IIIT5K,":[154],"ICDAR":[156],"datasets.":[157],"experimental":[159],"results":[160],"validate":[161],"effectiveness":[163],"components":[166],"show":[168],"that":[169],"convolutional-based":[171],"achieves":[173],"state-of-the-art":[174],"or":[175],"competitive":[176],"performance":[177],"over":[178],"prior":[179],"works,":[180],"even":[181],"without":[182],"use":[184],"RNN.":[186]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":11},{"year":2021,"cited_by_count":21},{"year":2020,"cited_by_count":19},{"year":2019,"cited_by_count":16},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
