{"id":"https://openalex.org/W4405842247","doi":"https://doi.org/10.1016/j.neucom.2024.129241","title":"QT-TextSR: Enhancing scene text image super-resolution via efficient interaction with text recognition using a Query-aware Transformer","display_name":"QT-TextSR: Enhancing scene text image super-resolution via efficient interaction with text recognition using a Query-aware Transformer","publication_year":2024,"publication_date":"2024-12-28","ids":{"openalex":"https://openalex.org/W4405842247","doi":"https://doi.org/10.1016/j.neucom.2024.129241"},"language":"en","primary_location":{"id":"doi:10.1016/j.neucom.2024.129241","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.neucom.2024.129241","pdf_url":null,"source":{"id":"https://openalex.org/S45693802","display_name":"Neurocomputing","issn_l":"0925-2312","issn":["0925-2312","1872-8286"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neurocomputing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1016/j.neucom.2024.129241","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114950344","display_name":"Chongyu Liu","orcid":"https://orcid.org/0000-0003-3648-9170"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]},{"id":"https://openalex.org/I917184967","display_name":"Bank of China","ror":"https://ror.org/02mt4s337","country_code":"CN","type":"other","lineage":["https://openalex.org/I917184967"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chongyu Liu","raw_affiliation_strings":["GRG Banking, Guangzhou, China","School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"GRG Banking, Guangzhou, China","institution_ids":["https://openalex.org/I917184967"]},{"raw_affiliation_string":"School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020445890","display_name":"Qing Jiang","orcid":"https://orcid.org/0000-0003-0660-596X"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qing Jiang","raw_affiliation_strings":["School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012042767","display_name":"Dezhi Peng","orcid":"https://orcid.org/0000-0002-3263-3449"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dezhi Peng","raw_affiliation_strings":["School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057516305","display_name":"Yuxin Kong","orcid":"https://orcid.org/0009-0002-6863-9786"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxin Kong","raw_affiliation_strings":["School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030985040","display_name":"Jianyi Zhang","orcid":"https://orcid.org/0000-0002-3955-6554"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaixin Zhang","raw_affiliation_strings":["School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034348457","display_name":"Longfei Xiong","orcid":null},"institutions":[{"id":"https://openalex.org/I4210108461","display_name":"Kingsoft (China)","ror":"https://ror.org/01stnfn33","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210108461"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longfei Xiong","raw_affiliation_strings":["Document AI Department, Kingsoft, Zhuhai, China"],"affiliations":[{"raw_affiliation_string":"Document AI Department, Kingsoft, Zhuhai, China","institution_ids":["https://openalex.org/I4210108461"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061035312","display_name":"Jiangyong Duan","orcid":"https://orcid.org/0000-0002-5095-2339"},"institutions":[{"id":"https://openalex.org/I4210108461","display_name":"Kingsoft (China)","ror":"https://ror.org/01stnfn33","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210108461"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiwei Duan","raw_affiliation_strings":["Document AI Department, Kingsoft, Zhuhai, China"],"affiliations":[{"raw_affiliation_string":"Document AI Department, Kingsoft, Zhuhai, China","institution_ids":["https://openalex.org/I4210108461"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100771484","display_name":"Cheng Sun","orcid":"https://orcid.org/0000-0003-4030-9355"},"institutions":[{"id":"https://openalex.org/I4210108461","display_name":"Kingsoft (China)","ror":"https://ror.org/01stnfn33","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210108461"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cheng Sun","raw_affiliation_strings":["Document AI Department, Kingsoft, Zhuhai, China"],"affiliations":[{"raw_affiliation_string":"Document AI Department, Kingsoft, Zhuhai, China","institution_ids":["https://openalex.org/I4210108461"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080674767","display_name":"Lianwen Jin","orcid":"https://orcid.org/0000-0002-5456-0957"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lianwen Jin","raw_affiliation_strings":["School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5080674767"],"corresponding_institution_ids":["https://openalex.org/I90610280"],"apc_list":{"value":2470,"currency":"USD","value_usd":2470},"apc_paid":{"value":2470,"currency":"USD","value_usd":2470},"fwci":1.579,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.85294337,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"620","issue":null,"first_page":"129241","last_page":"129241"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.749031126499176},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6681357026100159},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5904946327209473},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5063618421554565},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.45564043521881104},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.062290072441101074}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.749031126499176},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6681357026100159},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5904946327209473},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5063618421554565},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.45564043521881104},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.062290072441101074},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1016/j.neucom.2024.129241","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.neucom.2024.129241","pdf_url":null,"source":{"id":"https://openalex.org/S45693802","display_name":"Neurocomputing","issn_l":"0925-2312","issn":["0925-2312","1872-8286"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neurocomputing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1016/j.neucom.2024.129241","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.neucom.2024.129241","pdf_url":null,"source":{"id":"https://openalex.org/S45693802","display_name":"Neurocomputing","issn_l":"0925-2312","issn":["0925-2312","1872-8286"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neurocomputing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5270964542","display_name":null,"funder_award_id":"62441604","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6561198578","display_name":null,"funder_award_id":"62476093","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":75,"referenced_works":["https://openalex.org/W1885185971","https://openalex.org/W1971822075","https://openalex.org/W1981283549","https://openalex.org/W2133665775","https://openalex.org/W2194187530","https://openalex.org/W2810983211","https://openalex.org/W2908510526","https://openalex.org/W2963526661","https://openalex.org/W2980487166","https://openalex.org/W2994189733","https://openalex.org/W3040135872","https://openalex.org/W3082245115","https://openalex.org/W3083321694","https://openalex.org/W3157758108","https://openalex.org/W3164382044","https://openalex.org/W3174746398","https://openalex.org/W4365807615","https://openalex.org/W4382240051","https://openalex.org/W4385249955","https://openalex.org/W4387967900","https://openalex.org/W4387968993","https://openalex.org/W4392005811","https://openalex.org/W4392939417","https://openalex.org/W6629590909","https://openalex.org/W6649973027","https://openalex.org/W6652761251","https://openalex.org/W6681452975","https://openalex.org/W6681875759","https://openalex.org/W6702130928","https://openalex.org/W6704491142","https://openalex.org/W6720904035","https://openalex.org/W6725739302","https://openalex.org/W6727340567","https://openalex.org/W6736904528","https://openalex.org/W6739901393","https://openalex.org/W6741365857","https://openalex.org/W6747213469","https://openalex.org/W6749094794","https://openalex.org/W6753074096","https://openalex.org/W6757146666","https://openalex.org/W6762955845","https://openalex.org/W6764873646","https://openalex.org/W6769142534","https://openalex.org/W6777691785","https://openalex.org/W6779823529","https://openalex.org/W6782282802","https://openalex.org/W6784333009","https://openalex.org/W6786654440","https://openalex.org/W6791216369","https://openalex.org/W6792155083","https://openalex.org/W6797399245","https://openalex.org/W6798036263","https://openalex.org/W6798557865","https://openalex.org/W6798837711","https://openalex.org/W6800689796","https://openalex.org/W6801128355","https://openalex.org/W6802542124","https://openalex.org/W6803870738","https://openalex.org/W6804749718","https://openalex.org/W6810590372","https://openalex.org/W6811105506","https://openalex.org/W6811524624","https://openalex.org/W6839037898","https://openalex.org/W6839994690","https://openalex.org/W6845823181","https://openalex.org/W6847208268","https://openalex.org/W6850917252","https://openalex.org/W6851876447","https://openalex.org/W6854271032","https://openalex.org/W6854511844","https://openalex.org/W6855223112","https://openalex.org/W6856028325","https://openalex.org/W6857781186","https://openalex.org/W6858054068","https://openalex.org/W6860396999"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Scene":[0],"text":[1,18,27,37,41,48,72,80,86,96,101,120,123,144,162,236,243,248,251],"image":[2,73,97,124,145,237,252,269],"super-resolution":[3,32,74,98,125,238,253],"(STISR)":[4],"has":[5],"obtained":[6],"widespread":[7],"attention":[8],"in":[9,193],"recent":[10],"years":[11],"due":[12],"to":[13,16,25,82,152],"its":[14],"ability":[15],"enhance":[17,161],"recognition":[19,49,54,81,102,121,149,158,249],"performance.":[20,59],"Many":[21],"previous":[22,190],"methods":[23,192],"proposed":[24],"incorporate":[26],"prior":[28,170],"knowledge":[29],"into":[30,103,254],"the":[31,52,84,136,142,148,154,194],"architecture":[33],"for":[34,219,235,267],"reconstructing":[35],"high-quality":[36],"images.":[38],"However,":[39],"these":[40],"priors":[42,266],"are":[43],"typically":[44],"derived":[45],"from":[46],"pretrained":[47],"models,":[50],"and":[51,99,116,122,160,210,250,277],"inaccurate":[53,85],"feedback":[55,87],"will":[56],"hinder":[57],"overall":[58],"In":[60],"this":[61],"paper,":[62],"we":[63,165,286],"propose":[64,228,287],"a":[65,89,109,167,229,288],"novel":[66],"model,":[67],"QT-TextSR,":[68,285],"which":[69,263],"promotes":[70],"scene":[71,95,100],"by":[75,157,297],"introducing":[76,298],"efficient":[77,232],"interaction":[78,241],"with":[79,242],"release":[83],"through":[88,171],"Query-aware":[90,233],"Transformer.":[91],"Specifically,":[92],"QT-TextSR":[93,139,187,220,246],"decomposes":[94,247],"different":[104],"sets":[105,256,300],"of":[106,196,257,292,301],"queries":[107,258],"within":[108,259],"Vision-Language":[110,260],"Cooperation":[111,261],"Module,":[112,262],"explicitly":[113],"modeling":[114],"discriminative":[115],"interactive":[117],"features":[118],"between":[119],"tasks.":[126],"By":[127],"employing":[128],"two":[129],"separate":[130],"yet":[131,231],"simultaneous":[132],"projection":[133],"heads":[134],"on":[135,180,275,284],"corresponding":[137],"features,":[138],"can":[140],"recover":[141],"low-quality":[143],"meanwhile":[146],"obtain":[147],"results.":[150],"Additionally,":[151],"mitigate":[153],"limitations":[155],"caused":[156],"errors":[159],"structure":[163],"preservation,":[164],"introduce":[166],"strong":[168],"texture":[169],"self-supervised":[172],"pre-training,":[173],"leveraging":[174],"visual":[175],"cues":[176],"more":[177,299],"effectively.":[178],"Experiments":[179],"public":[181],"dataset,":[182],"TextZoom":[183,276],"demonstrate":[184],"that":[185],"our":[186],"significantly":[188],"outperforms":[189],"state-of-the-art":[191],"metrics":[195],"Recognition":[197],"Accuracy":[198],"(68%":[199],"v":[200,206,213],"s":[201,207,214],".":[202,208,215,225],"65.5%),":[203],"PSNR":[204],"(22.51":[205],"22.10),":[209],"SSIM":[211],"(0.7960":[212],"7930).":[216],"The":[217],"code":[218],"is":[221],"available":[222],"at":[223],"https://github.com/lcy0604/QT-TextSR":[224],"\u2022":[226,245,282],"We":[227],"simple":[230],"Transformer":[234],"via":[239],"effective":[240],"recognition.":[244],"distinct":[255],"leverages":[264],"semantic":[265],"better":[268],"recovery.":[270],"It":[271],"achieves":[272],"SOTA":[273],"results":[274],"show":[278],"good":[279],"generalization":[280],"ability.":[281],"Based":[283],"unified":[289],"model":[290],"capable":[291],"handling":[293],"multiple":[294],"OCR":[295],"tasks":[296],"queries.":[302]},"counts_by_year":[{"year":2025,"cited_by_count":6}],"updated_date":"2026-01-19T04:01:09.351973","created_date":"2025-10-10T00:00:00"}
