{"id":"https://openalex.org/W2896034938","doi":"https://doi.org/10.1145/3240508.3240571","title":"Attention and Language Ensemble for Scene Text Recognition with Convolutional Sequence Modeling","display_name":"Attention and Language Ensemble for Scene Text Recognition with Convolutional Sequence Modeling","publication_year":2018,"publication_date":"2018-10-15","ids":{"openalex":"https://openalex.org/W2896034938","doi":"https://doi.org/10.1145/3240508.3240571","mag":"2896034938"},"language":"en","primary_location":{"id":"doi:10.1145/3240508.3240571","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3240508.3240571","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM international conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008604905","display_name":"Shancheng Fang","orcid":"https://orcid.org/0000-0002-3100-3664"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shancheng Fang","raw_affiliation_strings":["Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078162380","display_name":"Hongtao Xie","orcid":"https://orcid.org/0000-0002-6249-5315"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongtao Xie","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003217535","display_name":"Zheng-Jun Zha","orcid":"https://orcid.org/0000-0003-2510-8993"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zheng-Jun Zha","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043182744","display_name":"Nannan Sun","orcid":"https://orcid.org/0000-0002-8007-774X"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Nannan Sun","raw_affiliation_strings":["Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100533815","display_name":"Jianlong Tan","orcid":"https://orcid.org/0009-0001-4558-560X"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianlong Tan","raw_affiliation_strings":["Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046305086","display_name":"Yongdong Zhang","orcid":"https://orcid.org/0000-0002-1151-1792"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongdong Zhang","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5008604905"],"corresponding_institution_ids":["https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":5.1184,"has_fulltext":false,"cited_by_count":68,"citation_normalized_percentile":{"value":0.9674727,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"248","last_page":"256"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8673321008682251},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.755403995513916},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.7229132652282715},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6404968500137329},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.6252926588058472},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.614547610282898},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6125007271766663},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.46766555309295654},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4419093728065491},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.41152283549308777},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3700653314590454},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.2223900854587555}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8673321008682251},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.755403995513916},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.7229132652282715},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6404968500137329},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.6252926588058472},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.614547610282898},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6125007271766663},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.46766555309295654},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4419093728065491},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.41152283549308777},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3700653314590454},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2223900854587555},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3240508.3240571","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3240508.3240571","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM international conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7699999809265137,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W70975097","https://openalex.org/W104184427","https://openalex.org/W1491389626","https://openalex.org/W1665214252","https://openalex.org/W1677182931","https://openalex.org/W1836465849","https://openalex.org/W1895191496","https://openalex.org/W1902237438","https://openalex.org/W1922126009","https://openalex.org/W1947481528","https://openalex.org/W1976080191","https://openalex.org/W1978729128","https://openalex.org/W1981283549","https://openalex.org/W1998042868","https://openalex.org/W2008806374","https://openalex.org/W2053317383","https://openalex.org/W2061802763","https://openalex.org/W2064675550","https://openalex.org/W2071027807","https://openalex.org/W2091526670","https://openalex.org/W2107878631","https://openalex.org/W2127141656","https://openalex.org/W2136994027","https://openalex.org/W2140132917","https://openalex.org/W2144554289","https://openalex.org/W2153182373","https://openalex.org/W2194187530","https://openalex.org/W2194775991","https://openalex.org/W2284050935","https://openalex.org/W2294053032","https://openalex.org/W2302255633","https://openalex.org/W2343052201","https://openalex.org/W2594435365","https://openalex.org/W2613904329","https://openalex.org/W2750938222","https://openalex.org/W2783960289","https://openalex.org/W2963360699","https://openalex.org/W2963403868","https://openalex.org/W2963517393","https://openalex.org/W2963718330","https://openalex.org/W2963970792","https://openalex.org/W2964199361","https://openalex.org/W2964308564","https://openalex.org/W2964312704","https://openalex.org/W4301445638"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W2745001401","https://openalex.org/W4321353415","https://openalex.org/W2130974462","https://openalex.org/W972276598","https://openalex.org/W4246352526","https://openalex.org/W2028665553","https://openalex.org/W2160451571","https://openalex.org/W2495256954","https://openalex.org/W2259317772"],"abstract_inverted_index":{"Recent":[0],"dominant":[1],"approaches":[2],"for":[3,131,145],"scene":[4,146],"text":[5,147],"recognition":[6],"are":[7,92,129],"mainly":[8],"based":[9,43],"on":[10,44,142,168],"convolutional":[11],"neural":[12,17],"network":[13,18],"(CNN)":[14],"and":[15,25,68,84,101,127,154],"recurrent":[16],"(RNN),":[19],"where":[20],"the":[21,26,54,61,69,96,99,133,174],"CNN":[22,67],"processes":[23],"images":[24],"RNN":[27],"generates":[28],"character":[29],"sequences.":[30],"Different":[31],"from":[32,120,125],"these":[33],"methods,":[34],"we":[35],"propose":[36],"an":[37,107,136],"attention-based":[38],"architecture1":[39],"which":[40,60],"is":[41,63,71],"completely":[42],"CNNs.":[45],"The":[46,157],"distinctive":[47],"characteristics":[48],"of":[49,115,176],"our":[50,161],"method":[51,55,163],"include:":[52],"(1)":[53],"follows":[56],"encoder-decoder":[57],"architecture,":[58],"in":[59,95,135],"encoder":[62],"a":[64,72,85,117],"two-dimensional":[65],"residual":[66],"decoder":[70],"deep":[73],"one-dimensional":[74],"CNN.":[75],"(2)":[76],"An":[77],"attention":[78,100,126],"module":[79,87],"that":[80,88],"captures":[81],"visual":[82],"cues,":[83],"language":[86,102,121,128],"models":[89],"linguistic":[90],"rules":[91],"designed":[93],"equally":[94],"decoder.":[97],"Therefore":[98],"can":[103],"be":[104],"viewed":[105],"as":[106],"ensemble":[108],"to":[109],"boost":[110],"predictions":[111],"jointly.":[112],"(3)":[113],"Instead":[114],"using":[116],"single":[118],"loss":[119],"aspect,":[122],"multiple":[123],"losses":[124],"accumulated":[130],"training":[132],"networks":[134],"end-to-end":[137],"way.":[138],"We":[139],"conduct":[140],"experiments":[141],"standard":[143],"datasets":[144],"recognition,":[148],"including":[149],"Street":[150],"View":[151],"Text,":[152],"IIIT5K":[153],"ICDAR":[155],"datasets.":[156],"experimental":[158],"results":[159],"show":[160],"CNN-based":[162],"has":[164],"achieved":[165],"state-of-the-art":[166],"performance":[167],"several":[169],"benchmark":[170],"datasets,":[171],"even":[172],"without":[173],"use":[175],"RNN.":[177]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":20},{"year":2020,"cited_by_count":11},{"year":2019,"cited_by_count":18}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
