{"id":"https://openalex.org/W4321485374","doi":"https://doi.org/10.1145/3539597.3570428","title":"Visual Matching is Enough for Scene Text Retrieval","display_name":"Visual Matching is Enough for Scene Text Retrieval","publication_year":2023,"publication_date":"2023-02-22","ids":{"openalex":"https://openalex.org/W4321485374","doi":"https://doi.org/10.1145/3539597.3570428"},"language":"en","primary_location":{"id":"doi:10.1145/3539597.3570428","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3539597.3570428","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Sixteenth ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056159906","display_name":"Lilong Wen","orcid":"https://orcid.org/0000-0001-9378-5932"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lilong Wen","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065505457","display_name":"Yingrong Wang","orcid":"https://orcid.org/0000-0003-1671-8589"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yingrong Wang","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011200911","display_name":"Dongxiang Zhang","orcid":"https://orcid.org/0000-0002-9964-2470"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongxiang Zhang","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100389286","display_name":"Gang Chen","orcid":"https://orcid.org/0000-0002-7483-0045"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Chen","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5056159906"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":1.7627,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.86584985,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"447","last_page":"455"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8604698181152344},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7060619592666626},{"id":"https://openalex.org/keywords/glyph","display_name":"Glyph (data visualization)","score":0.5413527488708496},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.5399046540260315},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.48976874351501465},{"id":"https://openalex.org/keywords/visual-word","display_name":"Visual Word","score":0.44852226972579956},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.4441388249397278},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.43398943543434143},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4252294600009918},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3791819214820862},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.3372630178928375},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3260490596294403},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.2864986062049866}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8604698181152344},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7060619592666626},{"id":"https://openalex.org/C142816647","wikidata":"https://www.wikidata.org/wiki/Q5573018","display_name":"Glyph (data visualization)","level":3,"score":0.5413527488708496},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.5399046540260315},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.48976874351501465},{"id":"https://openalex.org/C189391414","wikidata":"https://www.wikidata.org/wiki/Q7936579","display_name":"Visual Word","level":4,"score":0.44852226972579956},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.4441388249397278},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.43398943543434143},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4252294600009918},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3791819214820862},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3372630178928375},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3260490596294403},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2864986062049866},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3539597.3570428","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3539597.3570428","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Sixteenth ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5899999737739563,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1242313369","display_name":null,"funder_award_id":"2022YFF0902000","funder_id":"https://openalex.org/F4320321540","funder_display_name":"Ministry of Science and Technology of the People's Republic of China"}],"funders":[{"id":"https://openalex.org/F4320321540","display_name":"Ministry of Science and Technology of the People's Republic of China","ror":"https://ror.org/027s68j25"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1922126009","https://openalex.org/W2012689760","https://openalex.org/W2053317383","https://openalex.org/W2123410090","https://openalex.org/W2142636459","https://openalex.org/W2187089797","https://openalex.org/W2343052201","https://openalex.org/W2395611524","https://openalex.org/W2565591417","https://openalex.org/W2572730214","https://openalex.org/W2727586675","https://openalex.org/W2786459439","https://openalex.org/W2888894220","https://openalex.org/W2900564790","https://openalex.org/W2905469793","https://openalex.org/W2962986948","https://openalex.org/W2963517393","https://openalex.org/W2963712589","https://openalex.org/W2964294787","https://openalex.org/W2968226676","https://openalex.org/W2981969038","https://openalex.org/W2982148195","https://openalex.org/W2996956254","https://openalex.org/W3003868038","https://openalex.org/W3024152176","https://openalex.org/W3034447740","https://openalex.org/W3034792612","https://openalex.org/W3035679705","https://openalex.org/W3086275027","https://openalex.org/W3092309237","https://openalex.org/W3110398855","https://openalex.org/W3172793889","https://openalex.org/W3177167102","https://openalex.org/W3202415716","https://openalex.org/W4241698512"],"related_works":["https://openalex.org/W2063218608","https://openalex.org/W4386105885","https://openalex.org/W2184288218","https://openalex.org/W2947282851","https://openalex.org/W2374066281","https://openalex.org/W4387423606","https://openalex.org/W2071180033","https://openalex.org/W2036058638","https://openalex.org/W2528082075","https://openalex.org/W155590726"],"abstract_inverted_index":{"Given":[0],"a":[1,30,70,78,132,185,205],"text":[2,8,17,36,40,61,100,123,146,156,197],"query,":[3],"the":[4,16,34,38,56,60,75,105,117,126,177],"task":[5,76],"of":[6,108,152],"scene":[7,122,145],"retrieval":[9,80],"aims":[10],"at":[11],"searching":[12],"and":[13,37,62,121,148,159],"localizing":[14],"all":[15],"instances":[18],"that":[19,73,85,140,172],"are":[20,162],"contained":[21],"in":[22,42,165,180,204],"an":[23,166],"image":[24,63,135],"gallery.":[25],"The":[26,150],"state-of-the-art":[27],"method":[28,189],"learns":[29],"cross-modal":[31,50],"similarity":[32,118],"between":[33,59,119],"query":[35,99,120],"detected":[39],"regions":[41],"natural":[43],"images":[44,102],"to":[45,103,115,143,195],"facilitate":[46],"retrieval.":[47],"However,":[48],"this":[49,66],"approach":[51],"still":[52],"cannot":[53],"well":[54],"bridge":[55],"heterogeneity":[57],"gap":[58],"modalities.":[64],"In":[65],"paper,":[67],"we":[68,92,130],"propose":[69],"new":[71],"paradigm":[72,175],"converts":[74],"into":[77,101],"single-modality":[79],"problem.":[81],"Unlike":[82],"previous":[83],"works":[84],"rely":[86],"on":[87],"character":[88],"recognition":[89],"or":[90,202],"embedding,":[91],"directly":[93],"leverage":[94],"pictorial":[95],"information":[96],"by":[97],"rendering":[98],"learn":[104],"glyph":[106,153],"feature":[107,137,154],"each":[109],"character,":[110],"which":[111],"can":[112,190],"be":[113,192],"utilized":[114],"capture":[116],"images.":[124],"With":[125],"extracted":[127],"visual":[128,160],"features,":[129],"devise":[131],"synthetic":[133],"label":[134],"guided":[136],"alignment":[138],"mechanism":[139],"is":[141],"robust":[142],"different":[144],"styles":[147],"layouts.":[149],"modules":[151],"learning,":[155],"instance":[157],"detection,":[158],"matching":[161],"jointly":[163],"trained":[164],"end-to-end":[167],"framework.":[168],"Experimental":[169],"results":[170],"show":[171],"our":[173,188],"proposed":[174],"achieves":[176],"best":[178],"performance":[179],"multiple":[181],"benchmark":[182],"datasets.":[183],"As":[184],"side":[186],"product,":[187],"also":[191],"easily":[193],"generalized":[194],"support":[196],"queries":[198],"with":[199],"unseen":[200],"characters":[201],"languages":[203],"zero-shot":[206],"manner.":[207]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
