{"id":"https://openalex.org/W4386075667","doi":"https://doi.org/10.1109/cvpr52729.2023.01854","title":"DeepSolo: Let Transformer Decoder with Explicit Points Solo for Text Spotting","display_name":"DeepSolo: Let Transformer Decoder with Explicit Points Solo for Text Spotting","publication_year":2023,"publication_date":"2023-06-01","ids":{"openalex":"https://openalex.org/W4386075667","doi":"https://doi.org/10.1109/cvpr52729.2023.01854"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr52729.2023.01854","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52729.2023.01854","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070541608","display_name":"Maoyuan Ye","orcid":"https://orcid.org/0000-0002-4180-1096"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Maoyuan Ye","raw_affiliation_strings":["Wuhan University,China","Wuhan University, China"],"affiliations":[{"raw_affiliation_string":"Wuhan University,China","institution_ids":["https://openalex.org/I37461747"]},{"raw_affiliation_string":"Wuhan University, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100345321","display_name":"Jing Zhang","orcid":"https://orcid.org/0000-0001-6595-7661"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jing Zhang","raw_affiliation_strings":["The University of Sydney,Australia","The University of Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"The University of Sydney,Australia","institution_ids":["https://openalex.org/I129604602"]},{"raw_affiliation_string":"The University of Sydney, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114860317","display_name":"Shanshan Zhao","orcid":"https://orcid.org/0009-0009-5523-8407"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shanshan Zhao","raw_affiliation_strings":["JD Explore Academy,China","JD Explore Academy, China"],"affiliations":[{"raw_affiliation_string":"JD Explore Academy,China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"JD Explore Academy, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026501335","display_name":"Juhua Liu","orcid":"https://orcid.org/0000-0002-3907-8820"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Juhua Liu","raw_affiliation_strings":["Wuhan University,China","Wuhan University, China"],"affiliations":[{"raw_affiliation_string":"Wuhan University,China","institution_ids":["https://openalex.org/I37461747"]},{"raw_affiliation_string":"Wuhan University, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065250332","display_name":"Tongliang Liu","orcid":"https://orcid.org/0000-0002-9640-6472"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Tongliang Liu","raw_affiliation_strings":["The University of Sydney,Australia","The University of Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"The University of Sydney,Australia","institution_ids":["https://openalex.org/I129604602"]},{"raw_affiliation_string":"The University of Sydney, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060042752","display_name":"Bo Du","orcid":"https://orcid.org/0000-0002-0059-8458"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Du","raw_affiliation_strings":["Wuhan University,China","Wuhan University, China"],"affiliations":[{"raw_affiliation_string":"Wuhan University,China","institution_ids":["https://openalex.org/I37461747"]},{"raw_affiliation_string":"Wuhan University, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074103823","display_name":"Dacheng Tao","orcid":"https://orcid.org/0000-0001-7225-5449"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]},{"id":"https://openalex.org/I129604602","display_name":"University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU","CN"],"is_corresponding":false,"raw_author_name":"Dacheng Tao","raw_affiliation_strings":["JD Explore Academy,China","JD Explore Academy, China","The University of Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"JD Explore Academy,China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"JD Explore Academy, China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"The University of Sydney, Australia","institution_ids":["https://openalex.org/I129604602"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5070541608"],"corresponding_institution_ids":["https://openalex.org/I37461747"],"apc_list":null,"apc_paid":null,"fwci":11.9972,"has_fulltext":false,"cited_by_count":100,"citation_normalized_percentile":{"value":0.99128085,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"19348","last_page":"19357"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12707","display_name":"Vehicle License Plate Recognition","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8468477725982666},{"id":"https://openalex.org/keywords/spotting","display_name":"Spotting","score":0.8338863849639893},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7043429017066956},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5029794573783875},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4721558094024658},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45207101106643677},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.41097742319107056},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.23031601309776306}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8468477725982666},{"id":"https://openalex.org/C2779506182","wikidata":"https://www.wikidata.org/wiki/Q7580141","display_name":"Spotting","level":2,"score":0.8338863849639893},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7043429017066956},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5029794573783875},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4721558094024658},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45207101106643677},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.41097742319107056},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.23031601309776306},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr52729.2023.01854","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52729.2023.01854","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6000000238418579}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":79,"referenced_works":["https://openalex.org/W2008806374","https://openalex.org/W2127141656","https://openalex.org/W2128409098","https://openalex.org/W2135231474","https://openalex.org/W2144554289","https://openalex.org/W2150839555","https://openalex.org/W2194775991","https://openalex.org/W2222512263","https://openalex.org/W2785383245","https://openalex.org/W2875814315","https://openalex.org/W2908510526","https://openalex.org/W2914492226","https://openalex.org/W2962986948","https://openalex.org/W2963150697","https://openalex.org/W2963351448","https://openalex.org/W2964018263","https://openalex.org/W2965463054","https://openalex.org/W2970910956","https://openalex.org/W2981969038","https://openalex.org/W2983626510","https://openalex.org/W2988098900","https://openalex.org/W2996956254","https://openalex.org/W2997371611","https://openalex.org/W3034792612","https://openalex.org/W3082397598","https://openalex.org/W3092462694","https://openalex.org/W3094502228","https://openalex.org/W3096609285","https://openalex.org/W3097932944","https://openalex.org/W3109340983","https://openalex.org/W3110398855","https://openalex.org/W3111172959","https://openalex.org/W3131500599","https://openalex.org/W3134064484","https://openalex.org/W3138516171","https://openalex.org/W3152831436","https://openalex.org/W3159307593","https://openalex.org/W3160694286","https://openalex.org/W3170874841","https://openalex.org/W3175227919","https://openalex.org/W3179897446","https://openalex.org/W3189348500","https://openalex.org/W3195421894","https://openalex.org/W3196976036","https://openalex.org/W3199245537","https://openalex.org/W3203616741","https://openalex.org/W3205597380","https://openalex.org/W4221146106","https://openalex.org/W4225683503","https://openalex.org/W4226013992","https://openalex.org/W4237918430","https://openalex.org/W4286982960","https://openalex.org/W4304091583","https://openalex.org/W4304092561","https://openalex.org/W4312230431","https://openalex.org/W4312349930","https://openalex.org/W4312351507","https://openalex.org/W4312807693","https://openalex.org/W4312880451","https://openalex.org/W4315705623","https://openalex.org/W4382464147","https://openalex.org/W4385245566","https://openalex.org/W6739901393","https://openalex.org/W6752731419","https://openalex.org/W6757817989","https://openalex.org/W6778485988","https://openalex.org/W6780676603","https://openalex.org/W6781106802","https://openalex.org/W6784094891","https://openalex.org/W6784333009","https://openalex.org/W6788135285","https://openalex.org/W6797235774","https://openalex.org/W6801512601","https://openalex.org/W6802467256","https://openalex.org/W6809665764","https://openalex.org/W6810835471","https://openalex.org/W6811230874","https://openalex.org/W6841543928","https://openalex.org/W6844523961"],"related_works":["https://openalex.org/W2034439647","https://openalex.org/W3184921334","https://openalex.org/W4249589822","https://openalex.org/W2103063669","https://openalex.org/W4380551034","https://openalex.org/W4386895402","https://openalex.org/W3202382261","https://openalex.org/W4255446307","https://openalex.org/W4362466842","https://openalex.org/W2988098900"],"abstract_inverted_index":{"End-to-end":[0],"text":[1,7,72,80,109,127],"spotting":[2],"aims":[3],"to":[4,118,142],"integrate":[5],"scene":[6],"detection":[8,73],"and":[9,48,74,90,111,124,165],"recognition":[10,75],"into":[11],"a":[12,24,58,64,100,139],"unified":[13],"framework.":[14],"Dealing":[15],"with":[16,67,93,176],"the":[17,20,35,42,46,84,103,119],"relationship":[18],"between":[19,45],"two":[21],"sub-tasks":[22,47],"plays":[23],"pivotal":[25],"role":[26],"in":[27,133],"designing":[28],"effective":[29],"spotters.":[30],"Although":[31],"Transformer-based":[32],"methods":[33,164],"eliminate":[34],"heuristic":[36],"postprocessing,":[37],"they":[38],"still":[39],"suffer":[40],"from":[41],"synergy":[43],"issue":[44],"low":[49],"training":[50,168],"efficiency.":[51,169],"In":[52,170],"this":[53],"paper,":[54],"we":[55,82,136],"present":[56],"DeepSolo,":[57],"simple":[59,130],"DETR-like":[60],"baseline":[61],"that":[62,159],"lets":[63],"single":[65,101],"Decoder":[66],"Explicit":[68],"Points":[69],"Solo":[70],"for":[71,78],"simultaneously.":[76],"Technically,":[77],"each":[79],"instance,":[81],"represent":[83],"character":[85],"sequence":[86],"as":[87],"ordered":[88],"points":[89],"model":[91],"them":[92],"learnable":[94],"explicit":[95],"point":[96,104],"queries.":[97],"After":[98],"passing":[99],"decoder,":[102],"queries":[105],"have":[106],"encoded":[107],"requisite":[108],"semantics":[110],"locations,":[112],"thus":[113,148],"can":[114],"be":[115],"further":[116],"decoded":[117],"center":[120],"line,":[121],"boundary,":[122],"script,":[123],"confidence":[125],"of":[126],"via":[128],"very":[129],"prediction":[131],"heads":[132],"parallel.":[134],"Besides,":[135],"also":[137,174],"introduce":[138],"text-matching":[140],"criterion":[141],"deliver":[143],"more":[144,150],"accurate":[145],"supervisory":[146],"signals,":[147],"enabling":[149],"efficient":[151],"training.":[152],"Quantitative":[153],"experiments":[154],"on":[155],"public":[156],"benchmarks":[157],"demonstrate":[158],"DeepSolo":[160,172],"outperforms":[161],"previous":[162],"state-of-the-art":[163],"achieves":[166],"better":[167],"addition,":[171],"is":[173,189],"compatible":[175],"line":[177],"annotations,":[178],"which":[179],"require":[180],"much":[181],"less":[182],"annotation":[183],"cost":[184],"than":[185],"polygons.":[186],"The":[187],"code":[188],"available":[190],"at":[191],"https://github.com/ViTAE-Transformer/DeepSolo.":[192]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":40},{"year":2024,"cited_by_count":48},{"year":2023,"cited_by_count":6}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
