{"id":"https://openalex.org/W3198359920","doi":"https://doi.org/10.1145/3460426.3463639","title":"Scene Text Recognition with Cascade Attention Network","display_name":"Scene Text Recognition with Cascade Attention Network","publication_year":2021,"publication_date":"2021-08-24","ids":{"openalex":"https://openalex.org/W3198359920","doi":"https://doi.org/10.1145/3460426.3463639","mag":"3198359920"},"language":"en","primary_location":{"id":"doi:10.1145/3460426.3463639","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3460426.3463639","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100744463","display_name":"Min Zhang","orcid":"https://orcid.org/0000-0002-8681-5889"},"institutions":[{"id":"https://openalex.org/I4210128628","display_name":"Peking University Shenzhen Hospital","ror":"https://ror.org/03kkjyb15","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210128628"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Min Zhang","raw_affiliation_strings":["Peking University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Shenzhen, China","institution_ids":["https://openalex.org/I4210128628"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100762316","display_name":"Meng Ma","orcid":"https://orcid.org/0000-0002-1963-2513"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Ma","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100338593","display_name":"Ping Wang","orcid":"https://orcid.org/0000-0001-5759-8283"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ping Wang","raw_affiliation_strings":["Ministry of Education, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Ministry of Education, Beijing, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100744463"],"corresponding_institution_ids":["https://openalex.org/I4210128628"],"apc_list":null,"apc_paid":null,"fwci":0.3843,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.60882353,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"385","last_page":"393"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.988099992275238,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8001700639724731},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5774223208427429},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5563470721244812},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.548534631729126},{"id":"https://openalex.org/keywords/attention-network","display_name":"Attention network","score":0.5459025502204895},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5331050157546997},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5244731903076172},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.5172749161720276},{"id":"https://openalex.org/keywords/fuse","display_name":"Fuse (electrical)","score":0.479265421628952},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.43103355169296265},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4273815453052521},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3922775089740753},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3261232376098633},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.12987780570983887}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8001700639724731},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5774223208427429},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5563470721244812},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.548534631729126},{"id":"https://openalex.org/C2993807640","wikidata":"https://www.wikidata.org/wiki/Q103709453","display_name":"Attention network","level":2,"score":0.5459025502204895},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5331050157546997},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5244731903076172},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.5172749161720276},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.479265421628952},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.43103355169296265},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4273815453052521},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3922775089740753},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3261232376098633},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.12987780570983887},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3460426.3463639","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3460426.3463639","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.5699999928474426,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1491389626","https://openalex.org/W1521064364","https://openalex.org/W1569614731","https://openalex.org/W1971822075","https://openalex.org/W1981283549","https://openalex.org/W1998042868","https://openalex.org/W2049951199","https://openalex.org/W2135431879","https://openalex.org/W2146835493","https://openalex.org/W2194187530","https://openalex.org/W2343052201","https://openalex.org/W2750938222","https://openalex.org/W2752225195","https://openalex.org/W2788069964","https://openalex.org/W2810983211","https://openalex.org/W2875814315","https://openalex.org/W2896034938","https://openalex.org/W2911295582","https://openalex.org/W2963233387","https://openalex.org/W2963403868","https://openalex.org/W2963517393","https://openalex.org/W2980487166","https://openalex.org/W3004846386","https://openalex.org/W3035449864","https://openalex.org/W3106271744","https://openalex.org/W4234552385"],"related_works":["https://openalex.org/W2354322770","https://openalex.org/W3000097931","https://openalex.org/W1570848052","https://openalex.org/W2373192430","https://openalex.org/W4239268388","https://openalex.org/W1924837940","https://openalex.org/W2379407973","https://openalex.org/W4243305035","https://openalex.org/W2044769131","https://openalex.org/W2359293891"],"abstract_inverted_index":{"Scene":[0],"text":[1],"recognition":[2],"(STR)":[3],"has":[4],"experienced":[5],"increasing":[6],"popularity":[7],"both":[8,123],"in":[9,12,38,173],"academia":[10],"and":[11,77,127,131,140,150],"industry.":[13],"Regarding":[14],"STR":[15],"as":[16,53],"a":[17,89,102,115],"sequence":[18],"prediction":[19],"task,":[20],"most":[21],"state-of-the-art":[22],"(SOTA)":[23],"approaches":[24],"employ":[25],"the":[26,40,46,54,80,84,112,144,155,168],"attention-based":[27],"encoder-decoder":[28],"architecture":[29],"to":[30,70,108,119,137,147,154,159],"recognize":[31],"texts.":[32],"However,":[33],"these":[34],"methods":[35],"still":[36],"struggle":[37],"localizing":[39],"precise":[41],"alignment":[42],"center":[43],"associated":[44],"with":[45],"current":[47],"character,":[48],"which":[49],"is":[50,61,95],"also":[51],"named":[52],"attention":[55,91,100,104,117,135],"drift":[56],"phenomenon.":[57],"One":[58],"major":[59],"reason":[60],"that":[62,106],"directly":[63],"converting":[64],"low-quality":[65],"or":[66],"distorted":[67],"word":[68],"images":[69],"sequential":[71,109],"features":[72,110],"may":[73],"introduce":[74],"confusing":[75],"information":[76,126],"thus":[78],"mislead":[79],"network.":[81,92],"To":[82],"address":[83],"problem,":[85],"this":[86],"paper":[87],"proposes":[88],"cascade":[90],"The":[93],"model":[94],"composed":[96],"of":[97,122,170],"three":[98],"novel":[99],"modules:":[101],"vanilla":[103],"module":[105,118,136],"attends":[107],"from":[111],"horizontal":[113],"direction,":[114],"cross-network":[116],"take":[120],"advantage":[121],"one-dimension":[124],"contextual":[125],"two-dimension":[128],"visual":[129],"distributions,":[130],"an":[132],"aspects":[133],"fusion":[134],"fuse":[138],"spatial":[139],"channel-wise":[141],"information.":[142],"Accordingly,":[143],"network":[145],"manages":[146],"yield":[148],"distinguished":[149],"refined":[151],"representations":[152],"correlated":[153],"target":[156],"sequence.":[157],"Compared":[158],"SOTA":[160],"methods,":[161],"experimental":[162],"results":[163],"on":[164,177],"seven":[165],"benchmarks":[166],"demonstrate":[167],"superiority":[169],"our":[171],"framework":[172],"recognizing":[174],"scene":[175],"texts":[176],"various":[178],"conditions.":[179]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
