{"id":"https://openalex.org/W4415707936","doi":"https://doi.org/10.1109/icme59968.2025.11209851","title":"Scene Text Image Super-Resolution with Visual Text Cues Transfer and Enhancement","display_name":"Scene Text Image Super-Resolution with Visual Text Cues Transfer and Enhancement","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4415707936","doi":"https://doi.org/10.1109/icme59968.2025.11209851"},"language":null,"primary_location":{"id":"doi:10.1109/icme59968.2025.11209851","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209851","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101969639","display_name":"Ming-Jun Li","orcid":"https://orcid.org/0009-0008-8831-1191"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Mingjun Li","raw_affiliation_strings":["Nanjing University,State Key Laboratory for Novel Software Technology,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University,State Key Laboratory for Novel Software Technology,Nanjing,China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114987075","display_name":"Zeming Zhuang","orcid":null},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zeming Zhuang","raw_affiliation_strings":["Nanjing University,State Key Laboratory for Novel Software Technology,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University,State Key Laboratory for Novel Software Technology,Nanjing,China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006378862","display_name":"Feng Su","orcid":"https://orcid.org/0000-0002-8426-9634"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feng Su","raw_affiliation_strings":["Nanjing University,State Key Laboratory for Novel Software Technology,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University,State Key Laboratory for Novel Software Technology,Nanjing,China","institution_ids":["https://openalex.org/I881766915"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101969639"],"corresponding_institution_ids":["https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.33678,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.9700000286102295,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.9700000286102295,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.01119999960064888,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.003599999938160181,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6345000267028809},{"id":"https://openalex.org/keywords/noisy-text-analytics","display_name":"Noisy text analytics","score":0.5551999807357788},{"id":"https://openalex.org/keywords/readability","display_name":"Readability","score":0.545799970626831},{"id":"https://openalex.org/keywords/distortion","display_name":"Distortion (music)","score":0.5454000234603882},{"id":"https://openalex.org/keywords/text-detection","display_name":"Text detection","score":0.49720001220703125},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.46549999713897705},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4569000005722046},{"id":"https://openalex.org/keywords/fuse","display_name":"Fuse (electrical)","score":0.44279998540878296},{"id":"https://openalex.org/keywords/text-recognition","display_name":"Text recognition","score":0.43160000443458557}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7670000195503235},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6525999903678894},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6345000267028809},{"id":"https://openalex.org/C151375590","wikidata":"https://www.wikidata.org/wiki/Q17147076","display_name":"Noisy text analytics","level":4,"score":0.5551999807357788},{"id":"https://openalex.org/C2778143727","wikidata":"https://www.wikidata.org/wiki/Q1820650","display_name":"Readability","level":2,"score":0.545799970626831},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.5454000234603882},{"id":"https://openalex.org/C2983589003","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Text detection","level":3,"score":0.49720001220703125},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4781000018119812},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.46549999713897705},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4569000005722046},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.44279998540878296},{"id":"https://openalex.org/C2983812711","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Text recognition","level":3,"score":0.43160000443458557},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3772999942302704},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37529999017715454},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.34940001368522644},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3416999876499176},{"id":"https://openalex.org/C59415355","wikidata":"https://www.wikidata.org/wiki/Q3484781","display_name":"Text simplification","level":3,"score":0.3188999891281128},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.30480000376701355},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.30379998683929443},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.30250000953674316},{"id":"https://openalex.org/C2777146004","wikidata":"https://www.wikidata.org/wiki/Q14949826","display_name":"CLARITY","level":2,"score":0.29649999737739563},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.29019999504089355},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.2883000075817108},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2831000089645386},{"id":"https://openalex.org/C56461940","wikidata":"https://www.wikidata.org/wiki/Q970687","display_name":"Eye tracking","level":2,"score":0.25780001282691956},{"id":"https://openalex.org/C2985684807","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Text generation","level":2,"score":0.2549000084400177}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme59968.2025.11209851","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209851","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2194187530","https://openalex.org/W2810983211","https://openalex.org/W2963526661","https://openalex.org/W3096739052","https://openalex.org/W3107840264","https://openalex.org/W3174746398","https://openalex.org/W3177505580","https://openalex.org/W3181186176","https://openalex.org/W3206615376","https://openalex.org/W4223483730","https://openalex.org/W4225562651","https://openalex.org/W4285606401","https://openalex.org/W4312923907","https://openalex.org/W4312933868","https://openalex.org/W4382240051","https://openalex.org/W4382449827","https://openalex.org/W4385764576","https://openalex.org/W4386076368","https://openalex.org/W4387967900","https://openalex.org/W4390873988","https://openalex.org/W4392903474","https://openalex.org/W4403791742"],"related_works":[],"abstract_inverted_index":{"Scene":[0],"text":[1,13,25,34,50,59,70,79,89,98,114,121,133,144,156,171],"image":[2,71],"super-resolution":[3],"(STISR)":[4],"aims":[5],"to":[6,19,81,90,102,115,142,147],"improve":[7,53],"the":[8,12,20,29,33,54,57,88,109,112,132,152,162,167,179],"visual":[9,85,97,106],"clarity":[10],"of":[11,23,45,49,56,87,108,154,169],"in":[14,28,151],"low-resolution":[15,30,170],"scene":[16,69],"images.":[17,157],"Due":[18],"intrinsic":[21],"lack":[22],"detailed":[24],"appearance":[26],"information":[27],"input":[31,113],"image,":[32,60],"images":[35,172],"generated":[36,155],"by":[37],"most":[38],"STISR":[39,181],"methods":[40],"often":[41],"contain":[42],"varying":[43],"degrees":[44],"distortion":[46],"or":[47],"loss":[48,139],"details.":[51,122],"To":[52],"quality":[55,153],"super-resolved":[58],"we":[61,124],"propose":[62,125],"a":[63,95,148],"novel":[64,96],"Visual":[65],"Text":[66],"Cues":[67],"Transfer-based":[68],"Super-Resolution":[72],"Network":[73],"(VTCTSRN).":[74],"The":[75],"network":[76,164],"introduces":[77],"synthetic":[78],"prototypes":[80],"provide":[82],"high-resolution,":[83],"supplementary":[84],"cues":[86,99],"be":[91],"reconstructed,":[92],"and":[93,111,119,136,173],"leverages":[94],"transfer":[100],"mechanism":[101],"adaptively":[103],"fuse":[104],"complementary":[105],"characteristics":[107],"prototype":[110],"help":[116],"recover":[117],"clear":[118],"accurate":[120],"Additionally,":[123],"dynamic":[126],"attentional":[127],"sequential":[128],"recurrent":[129],"blocks":[130],"for":[131],"reconstruction":[134],"pipeline":[135],"introduce":[137],"effective":[138],"function":[140],"term":[141],"enhance":[143],"representation,":[145],"leading":[146],"marked":[149],"improvement":[150],"Experimental":[158],"results":[159],"demonstrate":[160],"that":[161],"proposed":[163],"significantly":[165],"enhances":[166],"readability":[168],"establishes":[174],"new":[175],"state-of-the-art":[176],"performance":[177],"on":[178],"mainstream":[180],"benchmark.":[182]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-30T00:00:00"}
