{"id":"https://openalex.org/W4392903474","doi":"https://doi.org/10.1109/icassp48485.2024.10447585","title":"Recognition-Guided Diffusion Model for Scene Text Image Super-Resolution","display_name":"Recognition-Guided Diffusion Model for Scene Text Image Super-Resolution","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903474","doi":"https://doi.org/10.1109/icassp48485.2024.10447585"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447585","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447585","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101498126","display_name":"Yuxuan Zhou","orcid":"https://orcid.org/0000-0002-7688-803X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuxuan Zhou","raw_affiliation_strings":["Wangxuan Institute of Computer Technology, Peking University,Beijing,China","Wangxuan Institute of Computer Technology, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, Peking University,Beijing,China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004474713","display_name":"Liangcai Gao","orcid":"https://orcid.org/0000-0001-9444-1568"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liangcai Gao","raw_affiliation_strings":["Wangxuan Institute of Computer Technology, Peking University,Beijing,China","Wangxuan Institute of Computer Technology, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, Peking University,Beijing,China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103175239","display_name":"Zhi Tang","orcid":"https://orcid.org/0000-0002-6021-8357"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhi Tang","raw_affiliation_strings":["Wangxuan Institute of Computer Technology, Peking University,Beijing,China","Wangxuan Institute of Computer Technology, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, Peking University,Beijing,China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072974615","display_name":"Baole Wei","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Baole Wei","raw_affiliation_strings":["Wangxuan Institute of Computer Technology, Peking University,Beijing,China","Wangxuan Institute of Computer Technology, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, Peking University,Beijing,China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101498126"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":2.6909,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.90949376,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2940","last_page":"2944"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10688","display_name":"Image and Signal Denoising Methods","score":0.98580002784729,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.9832000136375427,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7721008658409119},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7405364513397217},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7332795262336731},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6286118030548096},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5099161863327026},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.4821628928184509},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.48160240054130554},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.429523766040802},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.42415738105773926},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.41464075446128845}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7721008658409119},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7405364513397217},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7332795262336731},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6286118030548096},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5099161863327026},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.4821628928184509},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.48160240054130554},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.429523766040802},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.42415738105773926},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.41464075446128845},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447585","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447585","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5939423041","display_name":null,"funder_award_id":"Technology","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8513333314","display_name":null,"funder_award_id":"2021ZD01","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1885185971","https://openalex.org/W2194187530","https://openalex.org/W2810983211","https://openalex.org/W2963526661","https://openalex.org/W3107840264","https://openalex.org/W3155072588","https://openalex.org/W3168053944","https://openalex.org/W3180355996","https://openalex.org/W4212774754","https://openalex.org/W4226014430","https://openalex.org/W4226125322","https://openalex.org/W4226544595","https://openalex.org/W4285606401","https://openalex.org/W4312879041","https://openalex.org/W4312923907","https://openalex.org/W4312933868","https://openalex.org/W4382240051","https://openalex.org/W4382449827","https://openalex.org/W4396711834","https://openalex.org/W6687615561","https://openalex.org/W6783713337","https://openalex.org/W6788990321","https://openalex.org/W6810940779","https://openalex.org/W6811524624","https://openalex.org/W6838639034","https://openalex.org/W6855995626"],"related_works":["https://openalex.org/W4396941953","https://openalex.org/W2093104230","https://openalex.org/W2987280934","https://openalex.org/W4390874210","https://openalex.org/W4384918963","https://openalex.org/W4365211920","https://openalex.org/W2128027845","https://openalex.org/W3014948380","https://openalex.org/W4386184937","https://openalex.org/W4394728283"],"abstract_inverted_index":{"Scene":[0,23],"Text":[1,24],"Image":[2],"Super-Resolution":[3],"(STISR)":[4],"aims":[5],"to":[6,43,58,106],"enhance":[7],"the":[8,108,120,124],"resolution":[9],"and":[10,93,137],"legibility":[11],"of":[12,40,126],"text":[13,41,85,134],"within":[14],"low-resolution":[15],"(LR)":[16],"images,":[17,56],"consequently":[18],"elevating":[19],"recognition":[20,135],"accuracy":[21,136],"in":[22,96,132],"Recognition":[25],"(STR).":[26],"Previous":[27],"methods":[28,131],"predominantly":[29],"employ":[30],"discriminative":[31],"Convolutional":[32],"Neural":[33],"Networks":[34],"(CNNs)":[35],"augmented":[36],"with":[37,53],"diverse":[38],"forms":[39],"guidance":[42],"address":[44],"this":[45],"issue.":[46],"Nevertheless,":[47],"they":[48],"remain":[49],"deficient":[50],"when":[51,63],"confronted":[52],"severely":[54],"blurred":[55],"due":[57],"their":[59],"insufficient":[60],"generation":[61],"capability":[62],"little":[64],"structural":[65],"or":[66],"semantic":[67,116],"information":[68],"can":[69],"be":[70],"extracted":[71],"from":[72],"original":[73],"images.":[74],"Therefore,":[75],"we":[76,100],"introduce":[77],"RGDiffSR,":[78],"a":[79,102],"Recognition-Guided":[80,103],"Diffusion":[81],"model":[82,110],"for":[83],"scene":[84],"image":[86,138],"Super-Resolution,":[87],"which":[88],"exhibits":[89],"great":[90],"generative":[91],"diversity":[92],"fidelity":[94],"even":[95],"challenging":[97],"scenarios.":[98],"Moreover,":[99],"propose":[101],"Denoising":[104],"Network,":[105],"guide":[107],"diffusion":[109],"generating":[111],"LR-consistent":[112],"results":[113],"through":[114],"succinct":[115],"guidance.":[117],"Experiments":[118],"on":[119],"TextZoom":[121],"dataset":[122],"demonstrate":[123],"superiority":[125],"RGDiffSR":[127],"over":[128],"prior":[129],"state-of-the-art":[130],"both":[133],"fidelity.":[139]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":4}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
