{"id":"https://openalex.org/W4402216502","doi":"https://doi.org/10.1109/tcsvt.2024.3454331","title":"A Bilingual, Open World Video Text Dataset and Real-Time Video Text Spotting With Contrastive Learning","display_name":"A Bilingual, Open World Video Text Dataset and Real-Time Video Text Spotting With Contrastive Learning","publication_year":2024,"publication_date":"2024-09-04","ids":{"openalex":"https://openalex.org/W4402216502","doi":"https://doi.org/10.1109/tcsvt.2024.3454331"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2024.3454331","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3454331","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101634167","display_name":"Weijia Wu","orcid":"https://orcid.org/0000-0003-3912-7212"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Weijia Wu","raw_affiliation_strings":["College of Biomedical Engineering and Instrument Science, Zhejiang University, Hangzhou, China","Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"College of Biomedical Engineering and Instrument Science, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]},{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115076694","display_name":"Zhuang Li","orcid":"https://orcid.org/0009-0006-3870-4275"},"institutions":[{"id":"https://openalex.org/I4401726859","display_name":"Kuaishou (China)","ror":"https://ror.org/0258as409","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726859"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhuang Li","raw_affiliation_strings":["Kuaishou Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Kuaishou Technology, Beijing, China","institution_ids":["https://openalex.org/I4401726859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100760616","display_name":"Yuanqiang Cai","orcid":"https://orcid.org/0000-0001-8041-012X"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanqiang Cai","raw_affiliation_strings":["School of Computer Science, Beijing University of Posts and Telecommunications, Beijing, China","Beijing University of Posts and Telecommunication, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]},{"raw_affiliation_string":"Beijing University of Posts and Telecommunication, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043169810","display_name":"Hong Zhou","orcid":"https://orcid.org/0000-0003-1314-8883"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hong Zhou","raw_affiliation_strings":["College of Biomedical Engineering and Instrument Science, Zhejiang University, Hangzhou, China","Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"College of Biomedical Engineering and Instrument Science, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]},{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068937750","display_name":"Mike Zheng Shou","orcid":"https://orcid.org/0000-0002-7681-2166"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Mike Zheng Shou","raw_affiliation_strings":["Department of Electrical &#x0026; Computer Engineering, National University of Singapore, Queenstown, Singapore","National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Department of Electrical &#x0026; Computer Engineering, National University of Singapore, Queenstown, Singapore","institution_ids":["https://openalex.org/I165932596"]},{"raw_affiliation_string":"National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101634167"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":1.3901,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.84378605,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"35","issue":"1","first_page":"534","last_page":"546"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9544000029563904,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9544000029563904,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9526000022888184,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9473000168800354,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8060485124588013},{"id":"https://openalex.org/keywords/spotting","display_name":"Spotting","score":0.7508167028427124},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6118531823158264},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5869379043579102},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.33710891008377075}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8060485124588013},{"id":"https://openalex.org/C2779506182","wikidata":"https://www.wikidata.org/wiki/Q7580141","display_name":"Spotting","level":2,"score":0.7508167028427124},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6118531823158264},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5869379043579102},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.33710891008377075}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2024.3454331","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3454331","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7200000286102295,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G7283678196","display_name":null,"funder_award_id":"2022YFC3602601","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":78,"referenced_works":["https://openalex.org/W1569095176","https://openalex.org/W1964797636","https://openalex.org/W1995052126","https://openalex.org/W1998981432","https://openalex.org/W2008806374","https://openalex.org/W2012689760","https://openalex.org/W2013237800","https://openalex.org/W2027883219","https://openalex.org/W2044181395","https://openalex.org/W2083442372","https://openalex.org/W2083954025","https://openalex.org/W2114449851","https://openalex.org/W2135449683","https://openalex.org/W2137969867","https://openalex.org/W2138621090","https://openalex.org/W2144554289","https://openalex.org/W2194187530","https://openalex.org/W2340583188","https://openalex.org/W2343052201","https://openalex.org/W2605982830","https://openalex.org/W2606911419","https://openalex.org/W2785383245","https://openalex.org/W2786502936","https://openalex.org/W2875814315","https://openalex.org/W2962986948","https://openalex.org/W2963299604","https://openalex.org/W2963517393","https://openalex.org/W2963647456","https://openalex.org/W2963677766","https://openalex.org/W2964018263","https://openalex.org/W2964296749","https://openalex.org/W2982121298","https://openalex.org/W2998621280","https://openalex.org/W3034447740","https://openalex.org/W3034792612","https://openalex.org/W3034907434","https://openalex.org/W3035449864","https://openalex.org/W3035524453","https://openalex.org/W3082397598","https://openalex.org/W3089933987","https://openalex.org/W3095753995","https://openalex.org/W3104862079","https://openalex.org/W3106879600","https://openalex.org/W3106963860","https://openalex.org/W3128401049","https://openalex.org/W3130796238","https://openalex.org/W3158717538","https://openalex.org/W3159307593","https://openalex.org/W3165405144","https://openalex.org/W3174658120","https://openalex.org/W3179426054","https://openalex.org/W3198649044","https://openalex.org/W3208722310","https://openalex.org/W3208740751","https://openalex.org/W4287588076","https://openalex.org/W4312359729","https://openalex.org/W4385990975","https://openalex.org/W4386159002","https://openalex.org/W4386160492","https://openalex.org/W4389705007","https://openalex.org/W4390872876","https://openalex.org/W4390873537","https://openalex.org/W4401172514","https://openalex.org/W6618372016","https://openalex.org/W6620707391","https://openalex.org/W6637484576","https://openalex.org/W6677326919","https://openalex.org/W6691603626","https://openalex.org/W6744310024","https://openalex.org/W6761159557","https://openalex.org/W6767211374","https://openalex.org/W6776110894","https://openalex.org/W6799133303","https://openalex.org/W6805458221","https://openalex.org/W6810323488","https://openalex.org/W6811022444","https://openalex.org/W6855239411","https://openalex.org/W6858684622"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2034439647","https://openalex.org/W3184921334","https://openalex.org/W4249589822","https://openalex.org/W2058491579","https://openalex.org/W118429992","https://openalex.org/W1483316057","https://openalex.org/W4300154353","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Most":[0],"existing":[1,53],"video":[2,120,188],"text":[3,28,58,81,105,121,148,189],"spotting":[4,122,190],"benchmarks":[5],"focus":[6],"on":[7,204],"evaluating":[8],"a":[9,22,117,138,153],"single":[10],"language":[11],"and":[12,112,128,165,173,208,218,227],"scenario":[13],"with":[14,43,56,123,171,206],"limited":[15],"data.":[16],"In":[17],"this":[18],"work,":[19],"we":[20,39,115],"introduce":[21],"large-scale,":[23],"Bilingual,":[24],"Open":[25],"World":[26],"Video":[27],"benchmark":[29],"dataset":[30,55,63,217],"(BOVText).":[31],"There":[32],"are":[33,90],"four":[34],"features":[35],"for":[36,92],"BOVText.":[37],"Firstly,":[38],"provide":[40],"2,021":[41],"videos":[42],"more":[44],"than":[45,51],"1,750,000":[46],"frames,":[47],"25":[48],"times":[49],"larger":[50],"the":[52,93,98,101,144,179,212],"largest":[54],"incidental":[57],"in":[59,97,152],"videos.":[60],"Secondly,":[61],"our":[62,182],"covers":[64],"32":[65],"open":[66],"scenarios,":[67,71],"including":[68],"many":[69],"virtual":[70],"e.g.,":[72],"Life":[73],"Vlog,":[74],"Driving,":[75],"Movie,":[76],"Game,":[77],"etc.":[78],"Thirdly,":[79],"abundant":[80],"types":[82],"annotation":[83,106],"(i.e.,":[84],"title,":[85],"caption":[86],"or":[87],"scene":[88],"text)":[89],"provided":[91],"different":[94],"representational":[95],"meanings":[96],"video.":[99],"Fourthly,":[100],"BOVText":[102],"provides":[103],"bilingual":[104],"to":[107,168],"promote":[108],"multiple":[109],"cultures\u2019":[110],"lives":[111],"communication.":[113],"Besides,":[114],"propose":[116],"real-time":[118,154],"end-to-end":[119,155],"Contrastive":[124],"Learning":[125],"of":[126,181,199,220],"Semantic":[127],"Visual":[129],"Representation":[130],"(CoText),":[131],"which":[132],"includes":[133],"two":[134],"advantages:":[135],"1)":[136],"With":[137],"lightweight":[139],"architecture,":[140],"CoText":[141,159,185,221],"simultaneously":[142],"addresses":[143],"three":[145],"tasks":[146],"(e.g.,":[147],"detection,":[149],"tracking,":[150],"recognition)":[151],"trainable":[156],"framework.":[157],"2)":[158],"tracks":[160],"texts":[161],"by":[162],"comprehending":[163],"them":[164,167],"relating":[166],"each":[169],"other":[170],"visual":[172],"semantic":[174],"representations.":[175],"Extensive":[176],"experiments":[177],"show":[178],"superiority":[180],"method.":[183,215],"Especially,":[184],"achieves":[186],"an":[187],"<inline-formula":[191],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[192],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[193],"<tex-math":[194],"notation=\"LaTeX\">$\\mathrm":[195],"{":[196],"ID_{F1}}$":[197],"</tex-math></inline-formula>":[198],"71.7%":[200],"at":[201],"32.3":[202],"FPS":[203,210],"ICDAR2015video,":[205],"10.2%":[207],"23.3":[209],"improvement":[211],"previous":[213],"best":[214],"The":[216],"code":[219],"can":[222],"be":[223],"found":[224],"at:":[225],"Dataset":[226],"CoText,":[228],"respectively.":[229]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
