{"id":"https://openalex.org/W4392902984","doi":"https://doi.org/10.1109/icassp48485.2024.10446176","title":"Read, Spell and Repeat: Scene Text Recognition with Vision-Language Circular Refinement","display_name":"Read, Spell and Repeat: Scene Text Recognition with Vision-Language Circular Refinement","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392902984","doi":"https://doi.org/10.1109/icassp48485.2024.10446176"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446176","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10446176","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030061165","display_name":"Taiwei Zhang","orcid":"https://orcid.org/0000-0003-4440-419X"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]},{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Taiwei Zhang","raw_affiliation_strings":["Beihang University,Hangzhou Innovation Institute,Hangzhou,China","State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China","Hangzhou Innovation Institute, Beihang University, Hangzhou, China","School of Computer Science and Engineering, Beihang University, Beijing, China","Shanghai Artificial Intelligence Laboratory, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beihang University,Hangzhou Innovation Institute,Hangzhou,China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"Hangzhou Innovation Institute, Beihang University, Hangzhou, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066623095","display_name":"Zhenghui Hu","orcid":"https://orcid.org/0000-0002-5775-0481"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]},{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenghui Hu","raw_affiliation_strings":["Beihang University,Hangzhou Innovation Institute,Hangzhou,China","Shanghai Artificial Intelligence Laboratory, Shanghai, China","Hangzhou Innovation Institute, Beihang University, Hangzhou, China","State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China","School of Computer Science and Engineering, Beihang University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beihang University,Hangzhou Innovation Institute,Hangzhou,China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]},{"raw_affiliation_string":"Hangzhou Innovation Institute, Beihang University, Hangzhou, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100662467","display_name":"Weixin Li","orcid":"https://orcid.org/0000-0002-5093-5635"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]},{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weixin Li","raw_affiliation_strings":["Beihang University,Hangzhou Innovation Institute,Hangzhou,China","Shanghai Artificial Intelligence Laboratory, Shanghai, China","School of Computer Science and Engineering, Beihang University, Beijing, China","Hangzhou Innovation Institute, Beihang University, Hangzhou, China","State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beihang University,Hangzhou Innovation Institute,Hangzhou,China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"Hangzhou Innovation Institute, Beihang University, Hangzhou, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056847110","display_name":"Qingjie Liu","orcid":"https://orcid.org/0000-0002-5181-6451"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]},{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingjie Liu","raw_affiliation_strings":["Beihang University,Hangzhou Innovation Institute,Hangzhou,China","School of Computer Science and Engineering, Beihang University, Beijing, China","Shanghai Artificial Intelligence Laboratory, Shanghai, China","State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China","Hangzhou Innovation Institute, Beihang University, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beihang University,Hangzhou Innovation Institute,Hangzhou,China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]},{"raw_affiliation_string":"State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"Hangzhou Innovation Institute, Beihang University, Hangzhou, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5115589096","display_name":"Yunhong Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]},{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunhong Wang","raw_affiliation_strings":["Beihang University,Hangzhou Innovation Institute,Hangzhou,China","Hangzhou Innovation Institute, Beihang University, Hangzhou, China","State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China","Shanghai Artificial Intelligence Laboratory, Shanghai, China","School of Computer Science and Engineering, Beihang University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beihang University,Hangzhou Innovation Institute,Hangzhou,China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"Hangzhou Innovation Institute, Beihang University, Hangzhou, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02412794,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2720","last_page":"2724"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9850000143051147,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.98089998960495,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8098196983337402},{"id":"https://openalex.org/keywords/spell","display_name":"Spell","score":0.7521324157714844},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5946234464645386},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5798981189727783},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5730772018432617},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5378578901290894},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4977550804615021},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4403207302093506},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.42263099551200867},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.4150164723396301},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3274202346801758},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1205657422542572}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8098196983337402},{"id":"https://openalex.org/C2780957641","wikidata":"https://www.wikidata.org/wiki/Q1999796","display_name":"Spell","level":2,"score":0.7521324157714844},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5946234464645386},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5798981189727783},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5730772018432617},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5378578901290894},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4977550804615021},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4403207302093506},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.42263099551200867},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4150164723396301},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3274202346801758},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1205657422542572},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446176","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10446176","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1832166106","display_name":null,"funder_award_id":"LQ23F020024","funder_id":"https://openalex.org/F4320338464","funder_display_name":"Natural Science Foundation of Zhejiang Province"},{"id":"https://openalex.org/G3642141518","display_name":null,"funder_award_id":"20230484297","funder_id":"https://openalex.org/F4320334978","funder_display_name":"Beijing Nova Program"},{"id":"https://openalex.org/G7348502280","display_name":null,"funder_award_id":"62276018","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8934315408","display_name":null,"funder_award_id":"62302031","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320326444","display_name":"Nova","ror":null},{"id":"https://openalex.org/F4320334978","display_name":"Beijing Nova Program","ror":"https://ror.org/034k14f91"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null},{"id":"https://openalex.org/F4320338464","display_name":"Natural Science Foundation of Zhejiang Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1491389626","https://openalex.org/W1922126009","https://openalex.org/W1971822075","https://openalex.org/W1981283549","https://openalex.org/W1998042868","https://openalex.org/W2008806374","https://openalex.org/W2144554289","https://openalex.org/W2146835493","https://openalex.org/W2343052201","https://openalex.org/W2997749585","https://openalex.org/W3015415468","https://openalex.org/W3015688946","https://openalex.org/W3035449864","https://openalex.org/W3175618949","https://openalex.org/W3202415716","https://openalex.org/W4226329156","https://openalex.org/W4229030834","https://openalex.org/W4283819468","https://openalex.org/W4312787220","https://openalex.org/W4312843595","https://openalex.org/W4312879041","https://openalex.org/W4385245566","https://openalex.org/W6629590909","https://openalex.org/W6784333009"],"related_works":["https://openalex.org/W4287256643","https://openalex.org/W2745258745","https://openalex.org/W4296626305","https://openalex.org/W4254978807","https://openalex.org/W4287269020","https://openalex.org/W4307454463","https://openalex.org/W4287201819","https://openalex.org/W4252852831","https://openalex.org/W4233609247","https://openalex.org/W4387904771"],"abstract_inverted_index":{"Scene":[0],"Text":[1],"Recognition":[2],"(STR)":[3],"has":[4,82],"long":[5],"been":[6],"considered":[7],"an":[8],"important":[9],"yet":[10,91],"challenging":[11],"task":[12],"in":[13,88,113,142,162],"the":[14,30,41,49,84,104,114,123,163],"field":[15],"of":[16,43,78,86,116],"computer":[17],"vision.":[18],"Recent":[19],"works":[20],"have":[21,61],"demonstrated":[22],"that":[23],"utilizing":[24],"language":[25,44,67,81,130,140],"information":[26,45,141],"is":[27,93],"effective":[28,145],"for":[29,96],"visually":[31],"difficult":[32],"images,":[33],"like":[34],"ones":[35],"with":[36,127,150,159],"occultation":[37],"or":[38],"blurring.":[39],"However,":[40],"use":[42],"sometimes":[46],"leads":[47],"to":[48,63,66,74,154],"over-correction":[50],"problem.":[51],"For":[52],"out-of-vocabulary":[53],"samples":[54],"(e.g.":[55,71],"\"hou\"":[56,73],"and":[57,69,80,108,119,129,139,156],"\"0x4a\"),":[58],"some":[59],"methods":[60,161],"tended":[62],"be":[64],"biased":[65],"side":[68],"over-corrected":[70],"over-correct":[72],"\"hot\").":[75],"This":[76],"imbalance":[77],"vision":[79,128,138],"limited":[83],"usage":[85],"models":[87],"practical":[89],"scenarios,":[90],"it":[92],"rarely":[94],"occurs":[95],"human.":[97],"To":[98],"address":[99],"this":[100,133],"issue,":[101],"we":[102],"rethink":[103],"human\u2019s":[105],"recognition":[106,124],"process":[107,125],"propose":[109],"a":[110,143],"model":[111,136],"behaving":[112],"order":[115],"\"Read,":[117],"Spell":[118],"Repeat\".":[120],"It":[121],"refines":[122],"circularly":[126],"information.":[131],"With":[132],"mechanism,":[134],"our":[135],"integrates":[137],"more":[144],"manner,":[146],"achieving":[147],"higher":[148],"accuracy":[149],"less":[151],"parameters":[152],"compared":[153],"baseline":[155],"competitive":[157],"performance":[158],"SOTA":[160],"standard":[164],"benchmarks.":[165]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
