{"id":"https://openalex.org/W4312340530","doi":"https://doi.org/10.1109/icpr56361.2022.9956631","title":"Scene Text Recognition with Self-supervised Contrastive Predictive Coding","display_name":"Scene Text Recognition with Self-supervised Contrastive Predictive Coding","publication_year":2022,"publication_date":"2022-08-21","ids":{"openalex":"https://openalex.org/W4312340530","doi":"https://doi.org/10.1109/icpr56361.2022.9956631"},"language":"en","primary_location":{"id":"doi:10.1109/icpr56361.2022.9956631","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr56361.2022.9956631","pdf_url":null,"source":{"id":"https://openalex.org/S4363607731","display_name":"2022 26th International Conference on Pattern Recognition (ICPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 26th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042095719","display_name":"Xinzhe Jiang","orcid":"https://orcid.org/0009-0007-1684-3968"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xinzhe Jiang","raw_affiliation_strings":["University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei,P. R. China","National Engineering Research Center of Speech and Language Information Processing, University of Science and Technology of China, Hefei, P. R. China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei,P. R. China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"National Engineering Research Center of Speech and Language Information Processing, University of Science and Technology of China, Hefei, P. R. China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101760682","display_name":"Jianshu Zhang","orcid":"https://orcid.org/0000-0002-2713-2535"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jianshu Zhang","raw_affiliation_strings":["iFLYTEK Research"],"affiliations":[{"raw_affiliation_string":"iFLYTEK Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066595711","display_name":"Jun Du","orcid":"https://orcid.org/0000-0002-2387-0389"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Du","raw_affiliation_strings":["University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei,P. R. China","National Engineering Research Center of Speech and Language Information Processing, University of Science and Technology of China, Hefei, P. R. China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei,P. R. China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"National Engineering Research Center of Speech and Language Information Processing, University of Science and Technology of China, Hefei, P. R. China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012060181","display_name":"Zhenrong Zhang","orcid":"https://orcid.org/0000-0003-1125-6637"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenrong Zhang","raw_affiliation_strings":["University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei,P. R. China","National Engineering Research Center of Speech and Language Information Processing, University of Science and Technology of China, Hefei, P. R. China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei,P. R. China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"National Engineering Research Center of Speech and Language Information Processing, University of Science and Technology of China, Hefei, P. R. China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101605865","display_name":"Jiajia Wu","orcid":"https://orcid.org/0000-0001-7667-4878"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiajia Wu","raw_affiliation_strings":["iFLYTEK Research"],"affiliations":[{"raw_affiliation_string":"iFLYTEK Research","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5042095719"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.1199,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.43547697,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1514","last_page":"1521"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7694132328033447},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7267934679985046},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5689100623130798},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5351753234863281},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5233163833618164},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.4618602693080902},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.45992952585220337},{"id":"https://openalex.org/keywords/correlation","display_name":"Correlation","score":0.4473697543144226},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4390580952167511},{"id":"https://openalex.org/keywords/neural-coding","display_name":"Neural coding","score":0.4262383282184601},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4252357482910156},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11320436000823975}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7694132328033447},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7267934679985046},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5689100623130798},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5351753234863281},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5233163833618164},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.4618602693080902},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.45992952585220337},{"id":"https://openalex.org/C117220453","wikidata":"https://www.wikidata.org/wiki/Q5172842","display_name":"Correlation","level":2,"score":0.4473697543144226},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4390580952167511},{"id":"https://openalex.org/C77637269","wikidata":"https://www.wikidata.org/wiki/Q7002051","display_name":"Neural coding","level":2,"score":0.4262383282184601},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4252357482910156},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11320436000823975},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icpr56361.2022.9956631","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr56361.2022.9956631","pdf_url":null,"source":{"id":"https://openalex.org/S4363607731","display_name":"2022 26th International Conference on Pattern Recognition (ICPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 26th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.41999998688697815,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W1491389626","https://openalex.org/W1522301498","https://openalex.org/W1971822075","https://openalex.org/W1981283549","https://openalex.org/W1998042868","https://openalex.org/W2008806374","https://openalex.org/W2012112601","https://openalex.org/W2127141656","https://openalex.org/W2128409098","https://openalex.org/W2144554289","https://openalex.org/W2146835493","https://openalex.org/W2152790380","https://openalex.org/W2194187530","https://openalex.org/W2343052201","https://openalex.org/W2519091744","https://openalex.org/W2785325870","https://openalex.org/W2810983211","https://openalex.org/W2842511635","https://openalex.org/W2887997457","https://openalex.org/W2896034938","https://openalex.org/W2944828972","https://openalex.org/W2962790387","https://openalex.org/W2963587345","https://openalex.org/W2963901427","https://openalex.org/W2973049979","https://openalex.org/W2979382951","https://openalex.org/W2986661129","https://openalex.org/W2988326850","https://openalex.org/W3004846386","https://openalex.org/W3005680577","https://openalex.org/W3022933286","https://openalex.org/W3035524453","https://openalex.org/W3082397598","https://openalex.org/W3092309237","https://openalex.org/W3175855397","https://openalex.org/W3177684257","https://openalex.org/W3199031966","https://openalex.org/W4285508216","https://openalex.org/W4297808394","https://openalex.org/W6629590909","https://openalex.org/W6631190155","https://openalex.org/W6649973027","https://openalex.org/W6682948231","https://openalex.org/W6728765125","https://openalex.org/W6747899497","https://openalex.org/W6754278344","https://openalex.org/W6774314701","https://openalex.org/W6799449957"],"related_works":["https://openalex.org/W4293226380","https://openalex.org/W4390516098","https://openalex.org/W2181948922","https://openalex.org/W2384362569","https://openalex.org/W2142795561","https://openalex.org/W2366163563","https://openalex.org/W2163885456","https://openalex.org/W2065031478","https://openalex.org/W2166121228","https://openalex.org/W2128771053"],"abstract_inverted_index":{"Self-supervised":[0],"visual":[1,73,137],"pre-training":[2,102],"has":[3],"recently":[4],"emerged":[5],"in":[6],"scene":[7,120],"text":[8,61,121],"recognition":[9],"(STR),":[10],"which":[11,58,132],"designs":[12],"the":[13,39,72,77,82,87,115,128,134,152],"pretext":[14],"tasks":[15],"and":[16,70,103],"takes":[17],"unlabeled":[18],"data":[19,156],"as":[20,63],"input":[21],"to":[22,38,68,113],"obtain":[23],"useful":[24],"representations":[25],"for":[26,140],"STR.":[27,141],"However,":[28],"most":[29],"current":[30],"self-supervised":[31,49,130],"methods":[32],"do":[33],"not":[34],"pay":[35],"special":[36],"attention":[37],"importance":[40],"of":[41,136,154],"sequence":[42,65,74,138],"awareness.":[43],"Accordingly,":[44],"we":[45,94],"propose":[46],"a":[47,60,64,96,104],"novel":[48],"STR":[50,116],"method":[51,126],"based":[52],"on":[53,119],"contrastive":[54],"predictive":[55],"coding":[56],"(STR-CPC),":[57],"regards":[59],"instance":[62],"from":[66],"left":[67],"right":[69],"captures":[71],"correlation.":[75],"Considering":[76],"information":[78],"overlap":[79],"problem":[80],"within":[81],"feature":[83],"map":[84],"induced":[85],"by":[86],"deep":[88],"convolutional":[89],"neural":[90],"network":[91],"(CNN)":[92],"encoder,":[93],"design":[95],"widthwise":[97],"causal":[98],"convolution":[99],"during":[100,110],"model":[101,111],"progressive":[105],"recovery":[106],"training":[107,150],"strategy":[108],"(PRTS)":[109],"fine-tuning":[112],"improve":[114],"performance.":[117],"Experiments":[118],"show":[122],"that":[123],"our":[124],"STR-CPC":[125,143],"outperforms":[127],"existing":[129],"methods,":[131],"testifies":[133],"advantage":[135],"correlation":[139],"Additionally,":[142],"observably":[144],"boosts":[145],"performance":[146],"compared":[147],"with":[148],"supervised":[149],"when":[151],"amount":[153],"labeled":[155],"decreases.":[157]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
