{"id":"https://openalex.org/W4399423427","doi":"https://doi.org/10.1145/3652583.3658075","title":"TWIST: Text-only Weakly Supervised Scene Text Spotting Using Pseudo Labels","display_name":"TWIST: Text-only Weakly Supervised Scene Text Spotting Using Pseudo Labels","publication_year":2024,"publication_date":"2024-05-30","ids":{"openalex":"https://openalex.org/W4399423427","doi":"https://doi.org/10.1145/3652583.3658075"},"language":"en","primary_location":{"id":"doi:10.1145/3652583.3658075","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658075","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658075","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658075","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056159906","display_name":"Lilong Wen","orcid":"https://orcid.org/0000-0001-9378-5932"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lilong Wen","raw_affiliation_strings":["The State Key Laboratory of Blockchain and Data Security, Zhejiang University &amp; Hangzhou High-Tech Zone (Binjiang) Institute of Blockchain and Data Security, Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0001-9378-5932","affiliations":[{"raw_affiliation_string":"The State Key Laboratory of Blockchain and Data Security, Zhejiang University &amp; Hangzhou High-Tech Zone (Binjiang) Institute of Blockchain and Data Security, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072228670","display_name":"Xiu Tang","orcid":"https://orcid.org/0000-0001-8611-0283"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiu Tang","raw_affiliation_strings":["School of Software Technology, Zhejiang University &amp; Hangzhou High-Tech Zone (Binjiang) Institute of Blockchain and Data Security, Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0001-8611-0283","affiliations":[{"raw_affiliation_string":"School of Software Technology, Zhejiang University &amp; Hangzhou High-Tech Zone (Binjiang) Institute of Blockchain and Data Security, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011200911","display_name":"Dongxiang Zhang","orcid":"https://orcid.org/0000-0002-9964-2470"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongxiang Zhang","raw_affiliation_strings":["The State Key Laboratory of Blockchain and Data Security, Zhejiang University &amp; Hangzhou High-Tech Zone (Binjiang) Institute of Blockchain and Data Security, Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-9964-2470","affiliations":[{"raw_affiliation_string":"The State Key Laboratory of Blockchain and Data Security, Zhejiang University &amp; Hangzhou High-Tech Zone (Binjiang) Institute of Blockchain and Data Security, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5056159906"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.07011719,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"275","last_page":"284"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spotting","display_name":"Spotting","score":0.8877153396606445},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7919450998306274},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.715665876865387},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6519432663917542},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.610905110836029},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5714184045791626},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4756595492362976},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3963111639022827}],"concepts":[{"id":"https://openalex.org/C2779506182","wikidata":"https://www.wikidata.org/wiki/Q7580141","display_name":"Spotting","level":2,"score":0.8877153396606445},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7919450998306274},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.715665876865387},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6519432663917542},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.610905110836029},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5714184045791626},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4756595492362976},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3963111639022827},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3652583.3658075","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658075","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658075","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3652583.3658075","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658075","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658075","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4399423427.pdf","grobid_xml":"https://content.openalex.org/works/W4399423427.grobid-xml"},"referenced_works_count":46,"referenced_works":["https://openalex.org/W2065217551","https://openalex.org/W2103804906","https://openalex.org/W2295107390","https://openalex.org/W2343052201","https://openalex.org/W2727586675","https://openalex.org/W2750938222","https://openalex.org/W2787131894","https://openalex.org/W2810028092","https://openalex.org/W2873558679","https://openalex.org/W2914492226","https://openalex.org/W2963299604","https://openalex.org/W2963351448","https://openalex.org/W2983626510","https://openalex.org/W2987563462","https://openalex.org/W2998621280","https://openalex.org/W3002942143","https://openalex.org/W3003241074","https://openalex.org/W3034792612","https://openalex.org/W3034907434","https://openalex.org/W3089933987","https://openalex.org/W3106271744","https://openalex.org/W3111172959","https://openalex.org/W3134441556","https://openalex.org/W3157738407","https://openalex.org/W3159307593","https://openalex.org/W3160053372","https://openalex.org/W3161679556","https://openalex.org/W3181016597","https://openalex.org/W3184364189","https://openalex.org/W3196976036","https://openalex.org/W3209366795","https://openalex.org/W4214922754","https://openalex.org/W4292264029","https://openalex.org/W4304080540","https://openalex.org/W4304091583","https://openalex.org/W4308544918","https://openalex.org/W4312230431","https://openalex.org/W4312351507","https://openalex.org/W4312593844","https://openalex.org/W4321485374","https://openalex.org/W4362555417","https://openalex.org/W6600009415","https://openalex.org/W6601010567","https://openalex.org/W6629282964","https://openalex.org/W6688974507","https://openalex.org/W6699519708"],"related_works":["https://openalex.org/W2034439647","https://openalex.org/W3184921334","https://openalex.org/W4249589822","https://openalex.org/W2103063669","https://openalex.org/W4380551034","https://openalex.org/W4386895402","https://openalex.org/W3202382261","https://openalex.org/W4255446307","https://openalex.org/W2028814537","https://openalex.org/W2296205523"],"abstract_inverted_index":{"Scene":[0],"text":[1,83,152,194],"spotting":[2,148,229],"plays":[3],"a":[4,11,16,112,208],"pivotal":[5],"role":[6],"in":[7,241],"image":[8],"understanding.However,":[9],"building":[10],"robust":[12],"model":[13],"for":[14,66,111,191],"such":[15,81],"task":[17],"necessitates":[18],"substantial":[19],"annotated":[20],"data.Various":[21],"efforts":[22],"have":[23],"been":[24],"made":[25],"to":[26,87,116,142,165,222],"reduce":[27],"the":[28,39,60,94,108,119,147,158,163,167,180,189,200,213,225,228],"burden":[29],"of":[30,96,146,203,227],"extensive":[31],"data":[32],"labeling.In":[33],"this":[34,50],"paper,":[35],"we":[36,133],"focus":[37,117],"on":[38,47,79,118],"minimum":[40],"labor":[41],"cost":[42],"approach":[43,232],"that":[44,138,150,177],"solely":[45],"relies":[46],"text-only":[48,250],"annotations.Under":[49],"weakly":[51],"supervised":[52],"paradigm,":[53],"existing":[54],"methods":[55,70],"encounter":[56],"intrinsic":[57],"difficulties":[58],"since":[59],"location":[61,125,155],"information":[62],"is":[63,196],"not":[64,102],"available":[65],"training.To":[67],"compensate,":[68],"these":[69,131],"often":[71,123],"employ":[72],"attention":[73,109,172],"maps":[74,173],"generated":[75,197,214],"from":[76],"models":[77],"pre-trained":[78],"tasks":[80],"as":[82,186],"recognition":[84,153,247],"or":[85],"classification":[86],"predict":[88],"spatial":[89],"information.This":[90],"approach,":[91],"however,":[92],"impedes":[93],"possibility":[95],"comprehensive":[97],"end-to-end":[98,144,234,246],"training":[99,145,235],"and":[100,154,174,236,245],"does":[101],"ensure":[103],"optimal":[104],"performance":[105],"outcomes.What's":[106],"more,":[107],"map":[110],"single":[113],"word":[114],"tends":[115],"distinguishing":[120],"areas,":[121],"which":[122],"yields":[124],"predictions":[126],"with":[127,170,216],"suboptimal":[128],"boundaries.To":[129],"overcome":[130],"limitations,":[132],"introduce":[134],"an":[135],"innovative":[136],"methodology":[137],"integrates":[139],"pseudo-label":[140,190],"generation":[141],"enable":[143],"network":[149],"optimizes":[151],"estimation":[156],"at":[157],"same":[159],"time":[160],"called":[161],"TWIST.During":[162],"training,":[164],"address":[166],"problem":[168],"associated":[169],"incomplete":[171],"obtain":[175],"pseudo-labels":[176,215],"can":[178],"cover":[179],"whole":[181],"word,":[182],"TWIST":[183],"treats":[184],"characters":[185],"elemental":[187],"units.So":[188],"each":[192],"given":[193],"instance":[195],"by":[198],"aggregating":[199],"inferred":[201],"locations":[202],"their":[204],"constituent":[205],"characters,":[206],"through":[207],"masked":[209],"character":[210],"prediction":[211],"task.Then":[212],"corresponding":[217],"textual":[218],"content":[219],"are":[220],"used":[221],"further":[223],"optimize":[224],"parameters":[226],"network.This":[230],"integrated":[231],"facilitates":[233],"achieves":[237],"new":[238],"state-of-the-art":[239],"results":[240],"several":[242],"public":[243],"detection":[244],"benchmarks":[248],"under":[249],"supervision.":[251]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
