{"id":"https://openalex.org/W4392980188","doi":"https://doi.org/10.1109/tpami.2024.3379828","title":"Turning a CLIP Model Into a Scene Text Spotter","display_name":"Turning a CLIP Model Into a Scene Text Spotter","publication_year":2024,"publication_date":"2024-03-20","ids":{"openalex":"https://openalex.org/W4392980188","doi":"https://doi.org/10.1109/tpami.2024.3379828","pmid":"https://pubmed.ncbi.nlm.nih.gov/38507385"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2024.3379828","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2024.3379828","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007178447","display_name":"Wenwen Yu","orcid":"https://orcid.org/0000-0003-4666-3646"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wenwen Yu","raw_affiliation_strings":["School of Artificial Intelligence and Automation, Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence and Automation, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100389396","display_name":"Yuliang Liu","orcid":"https://orcid.org/0000-0002-3037-173X"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuliang Liu","raw_affiliation_strings":["School of Artificial Intelligence and Automation, Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence and Automation, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026660601","display_name":"Xingkui Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xingkui Zhu","raw_affiliation_strings":["School of Artificial Intelligence and Automation, Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence and Automation, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024986567","display_name":"Haoyu Cao","orcid":"https://orcid.org/0000-0002-3789-9705"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoyu Cao","raw_affiliation_strings":["Tencent, YouTu Lab, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Tencent, YouTu Lab, Hefei, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004402130","display_name":"Xing Sun","orcid":"https://orcid.org/0000-0001-8132-9083"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xing Sun","raw_affiliation_strings":["Tencent, YouTu Lab, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Tencent, YouTu Lab, Hefei, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039363991","display_name":"Xiang Bai","orcid":"https://orcid.org/0000-0002-3449-5940"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiang Bai","raw_affiliation_strings":["School of Artificial Intelligence and Automation, Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence and Automation, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5007178447"],"corresponding_institution_ids":["https://openalex.org/I47720641"],"apc_list":null,"apc_paid":null,"fwci":4.7273,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.95921062,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"46","issue":"9","first_page":"6040","last_page":"6054"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7101097106933594},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6666057109832764},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5342705845832825},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4234147369861603},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3983081579208374},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.37753215432167053},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3487395644187927}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7101097106933594},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6666057109832764},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5342705845832825},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4234147369861603},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3983081579208374},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.37753215432167053},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3487395644187927}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2024.3379828","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2024.3379828","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:38507385","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38507385","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2360553055","display_name":null,"funder_award_id":"2022YFC2305102","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G3596061624","display_name":null,"funder_award_id":"62206103","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4189157326","display_name":null,"funder_award_id":"62225603","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":97,"referenced_works":["https://openalex.org/W1972065312","https://openalex.org/W2008806374","https://openalex.org/W2144554289","https://openalex.org/W2194775991","https://openalex.org/W2339589954","https://openalex.org/W2343052201","https://openalex.org/W2515655118","https://openalex.org/W2550687635","https://openalex.org/W2604243686","https://openalex.org/W2604735854","https://openalex.org/W2605076167","https://openalex.org/W2605982830","https://openalex.org/W2777652944","https://openalex.org/W2785383245","https://openalex.org/W2810028092","https://openalex.org/W2875814315","https://openalex.org/W2902494497","https://openalex.org/W2914492226","https://openalex.org/W2962749812","https://openalex.org/W2962793481","https://openalex.org/W2962986948","https://openalex.org/W2963351448","https://openalex.org/W2963353821","https://openalex.org/W2963977642","https://openalex.org/W2964018263","https://openalex.org/W2964082390","https://openalex.org/W2964296749","https://openalex.org/W2965463054","https://openalex.org/W2967615747","https://openalex.org/W2968226676","https://openalex.org/W2970476646","https://openalex.org/W2970910956","https://openalex.org/W2981969038","https://openalex.org/W2982770724","https://openalex.org/W2983626510","https://openalex.org/W2987563462","https://openalex.org/W2988098900","https://openalex.org/W2991626090","https://openalex.org/W2996956254","https://openalex.org/W2997371611","https://openalex.org/W2998621280","https://openalex.org/W3002942143","https://openalex.org/W3003868038","https://openalex.org/W3003921261","https://openalex.org/W3034514377","https://openalex.org/W3034792612","https://openalex.org/W3035222584","https://openalex.org/W3035396860","https://openalex.org/W3035679705","https://openalex.org/W3092244643","https://openalex.org/W3093319256","https://openalex.org/W3096609285","https://openalex.org/W3097932944","https://openalex.org/W3106879600","https://openalex.org/W3110398855","https://openalex.org/W3111172959","https://openalex.org/W3134095442","https://openalex.org/W3152831436","https://openalex.org/W3159307593","https://openalex.org/W3171030392","https://openalex.org/W3172799005","https://openalex.org/W3179426054","https://openalex.org/W3179897446","https://openalex.org/W3181016597","https://openalex.org/W3184364189","https://openalex.org/W3184923351","https://openalex.org/W3186906052","https://openalex.org/W3196976036","https://openalex.org/W3198377975","https://openalex.org/W4214922754","https://openalex.org/W4225683503","https://openalex.org/W4226058394","https://openalex.org/W4281250886","https://openalex.org/W4304091583","https://openalex.org/W4310335763","https://openalex.org/W4312230431","https://openalex.org/W4312310776","https://openalex.org/W4312351507","https://openalex.org/W4312458986","https://openalex.org/W4312593844","https://openalex.org/W4312880451","https://openalex.org/W4312988011","https://openalex.org/W4313006333","https://openalex.org/W4313172891","https://openalex.org/W4380839071","https://openalex.org/W4382464147","https://openalex.org/W4385245566","https://openalex.org/W4386065412","https://openalex.org/W4386075667","https://openalex.org/W6620707391","https://openalex.org/W6726347659","https://openalex.org/W6752534923","https://openalex.org/W6784094891","https://openalex.org/W6788870397","https://openalex.org/W6791353385","https://openalex.org/W6796744688","https://openalex.org/W6802517928"],"related_works":["https://openalex.org/W2755342338","https://openalex.org/W2775347418","https://openalex.org/W2779427294","https://openalex.org/W2625805835","https://openalex.org/W2079911747","https://openalex.org/W3116076068","https://openalex.org/W2069885731","https://openalex.org/W2563206327","https://openalex.org/W2108687567","https://openalex.org/W2789220062"],"abstract_inverted_index":{"We":[0],"exploit":[1],"the":[2,5,57,111,148,181,187],"potential":[3],"of":[4,103,119,147,157],"large-scale":[6],"Contrastive":[7],"Language-Image":[8],"Pretraining":[9],"(CLIP)":[10],"model":[11],"to":[12,37,55],"enhance":[13,56,92],"scene":[14],"text":[15,62,66,94,124,162,175],"detection":[16,125,163,176],"and":[17,33,40,46,61,82,96,105,121,126,159,164,177,186],"spotting":[18,127,165,178],"tasks,":[19,128,166],"transforming":[20],"it":[21],"into":[22],"a":[23,131],"robust":[24,140],"backbone,":[25,114],"FastTCM-CR50.":[26],"This":[27],"backbone":[28],"utilizes":[29],"visual":[30],"prompt":[31,77],"learning":[32],"cross-attention":[34],"in":[35,123,134],"CLIP":[36],"extract":[38],"image":[39,60],"text-based":[41],"prior":[42],"knowledge.":[43],"Using":[44],"predefined":[45],"learnable":[47],"prompts,":[48],"FastTCM-CR50":[49,85,151],"introduces":[50],"an":[51,101,116,155],"instance-language":[52],"matching":[53],"process":[54],"synergy":[58],"between":[59],"embeddings,":[63],"thereby":[64],"refining":[65],"regions.":[67],"Our":[68],"Bimodal":[69],"Similarity":[70],"Matching":[71],"(BSM)":[72],"module":[73],"facilitates":[74],"dynamic":[75],"language":[76],"generation,":[78],"enabling":[79],"offline":[80],"computations":[81],"improving":[83,98],"performance.":[84],"offers":[86],"several":[87],"advantages:":[88],"1)":[89],"It":[90,109,138,169],"can":[91],"existing":[93],"detectors":[95],"spotters,":[97],"performance":[99,153,172],"by":[100,154],"average":[102,117,156],"1.6%":[104],"1.5%,":[106],"respectively.":[107,167],"2)":[108],"outperforms":[110],"previous":[112],"TCM-CR50":[113],"yielding":[115],"improvement":[118],"0.2%":[120],"0.55%":[122],"along":[129],"with":[130],"47.1%":[132],"increase":[133],"inference":[135],"speed.":[136],"3)":[137],"showcases":[139],"few-shot":[141],"training":[142],"capabilities.":[143],"Utilizing":[144],"only":[145],"10%":[146],"supervised":[149],"data,":[150],"improves":[152],"26.5%":[158],"4.7%":[160],"for":[161,190],"4)":[168],"consistently":[170],"enhances":[171],"on":[173],"out-of-distribution":[174],"datasets,":[179],"particularly":[180],"NightTime-ArT":[182],"subset":[183],"from":[184],"ICDAR2019-ArT":[185],"DOTA":[188],"dataset":[189],"oriented":[191],"object":[192],"detection.":[193]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":5}],"updated_date":"2026-03-31T07:56:22.981413","created_date":"2025-10-10T00:00:00"}
