{"id":"https://openalex.org/W4304092561","doi":"https://doi.org/10.1145/3503161.3548266","title":"Decoupling Recognition from Detection: Single Shot Self-Reliant Scene Text Spotter","display_name":"Decoupling Recognition from Detection: Single Shot Self-Reliant Scene Text Spotter","publication_year":2022,"publication_date":"2022-10-10","ids":{"openalex":"https://openalex.org/W4304092561","doi":"https://doi.org/10.1145/3503161.3548266"},"language":"en","primary_location":{"id":"doi:10.1145/3503161.3548266","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503161.3548266","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014956557","display_name":"Jingjing Wu","orcid":"https://orcid.org/0000-0002-3818-4277"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jingjing Wu","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027156714","display_name":"Pengyuan Lyu","orcid":"https://orcid.org/0000-0003-3153-8519"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengyuan Lyu","raw_affiliation_strings":["Baidu Inc., Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Baidu Inc., Shenzhen, China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112373191","display_name":"Guangming Lu","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangming Lu","raw_affiliation_strings":["Guangdong Provincial Key Laboratory of Novel Security Intelligence Technologies &amp; Harbin Institute of Technology, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Guangdong Provincial Key Laboratory of Novel Security Intelligence Technologies &amp; Harbin Institute of Technology, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056247902","display_name":"Chengquan Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengquan Zhang","raw_affiliation_strings":["Baidu Inc., Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Baidu Inc., Shenzhen, China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051264771","display_name":"Kun Yao","orcid":"https://orcid.org/0000-0001-7155-4076"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kun Yao","raw_affiliation_strings":["Baidu Inc., Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Baidu Inc., Shenzhen, China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078487642","display_name":"Wenjie Pei","orcid":"https://orcid.org/0000-0001-8117-2696"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenjie Pei","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5014956557"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":1.0195,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.84186511,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1319","last_page":"1328"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9847000241279602,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9843000173568726,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7839953899383545},{"id":"https://openalex.org/keywords/spotting","display_name":"Spotting","score":0.6738232970237732},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6324777007102966},{"id":"https://openalex.org/keywords/text-detection","display_name":"Text detection","score":0.613975465297699},{"id":"https://openalex.org/keywords/text-recognition","display_name":"Text recognition","score":0.5885977149009705},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5439912676811218},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.5246684551239014},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4639667272567749},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4429386258125305},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4420674443244934},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3939087986946106},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.32976534962654114},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.1790345311164856}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7839953899383545},{"id":"https://openalex.org/C2779506182","wikidata":"https://www.wikidata.org/wiki/Q7580141","display_name":"Spotting","level":2,"score":0.6738232970237732},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6324777007102966},{"id":"https://openalex.org/C2983589003","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Text detection","level":3,"score":0.613975465297699},{"id":"https://openalex.org/C2983812711","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Text recognition","level":3,"score":0.5885977149009705},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5439912676811218},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.5246684551239014},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4639667272567749},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4429386258125305},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4420674443244934},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3939087986946106},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.32976534962654114},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.1790345311164856},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3503161.3548266","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503161.3548266","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1922126009","https://openalex.org/W2127141656","https://openalex.org/W2128409098","https://openalex.org/W2194187530","https://openalex.org/W2343052201","https://openalex.org/W2412782625","https://openalex.org/W2504335775","https://openalex.org/W2550687635","https://openalex.org/W2605982830","https://openalex.org/W2777652944","https://openalex.org/W2784050770","https://openalex.org/W2785383245","https://openalex.org/W2810983211","https://openalex.org/W2902494497","https://openalex.org/W2963299604","https://openalex.org/W2963840241","https://openalex.org/W2964296749","https://openalex.org/W2970910956","https://openalex.org/W2983626510","https://openalex.org/W2996956254","https://openalex.org/W2997371611","https://openalex.org/W3003921261","https://openalex.org/W3027134841","https://openalex.org/W3097932944","https://openalex.org/W3102695566","https://openalex.org/W3106250896","https://openalex.org/W3110398855","https://openalex.org/W3152831436","https://openalex.org/W3159307593","https://openalex.org/W3179426054","https://openalex.org/W6681452975"],"related_works":["https://openalex.org/W2988098900","https://openalex.org/W4225683503","https://openalex.org/W2949229884","https://openalex.org/W2875814315","https://openalex.org/W4323366758","https://openalex.org/W2982121298","https://openalex.org/W4226291595","https://openalex.org/W3205597380","https://openalex.org/W3005922567","https://openalex.org/W2996956254"],"abstract_inverted_index":{"Typical":[0],"text":[1,14,20,25,43,51,114,137,144,158],"spotters":[2,178],"follow":[3],"the":[4,9,23,48,55,69,94,124,136,142,154],"two-stage":[5],"spotting":[6],"strategy:":[7],"detect":[8],"precise":[10,143],"boundary":[11],"for":[12,157],"a":[13],"instance":[15],"first":[16],"and":[17,71,77,116,120,166,184],"then":[18],"perform":[19],"recognition":[21,44,72,108,117],"within":[22],"located":[24],"region.":[26],"While":[27],"such":[28],"strategy":[29],"has":[30],"achieved":[31],"substantial":[32],"progress,":[33],"there":[34],"are":[35,146],"two":[36],"underlying":[37],"limitations.":[38],"1)":[39],"The":[40,64],"performance":[41],"of":[42,50,181],"depends":[45],"heavily":[46],"on":[47,163],"precision":[49],"detection,":[52],"resulting":[53],"in":[54,118,179],"potential":[56],"error":[57],"propagation":[58],"from":[59,75,86,109],"detection":[60,70,115,159],"to":[61,79,134,148,175],"recognition.":[62],"2)":[63],"RoI":[65],"cropping":[66],"which":[67,102],"bridges":[68],"brings":[73],"noise":[74],"background":[76],"leads":[78],"information":[80],"loss":[81],"when":[82],"pooling":[83],"or":[84],"interpolating":[85],"feature":[87],"maps.":[88],"In":[89],"this":[90],"work":[91],"we":[92,112],"propose":[93],"single":[95],"shot":[96],"Self-Reliant":[97],"Scene":[98],"Text":[99],"Spotter":[100],"(SRSTS),":[101],"circumvents":[103],"these":[104],"limitations":[105],"by":[106,123],"decoupling":[107],"detection.":[110],"Specifically,":[111],"conduct":[113],"parallel":[119],"bridge":[121],"them":[122],"shared":[125],"positive":[126],"anchor":[127],"point.":[128],"Consequently,":[129],"our":[130,151,171],"method":[131,152],"is":[132],"able":[133],"recognize":[135],"instances":[138],"correctly":[139],"even":[140],"though":[141],"boundaries":[145],"challenging":[147],"detect.":[149],"Additionally,":[150],"reduces":[153],"annotation":[155],"cost":[156],"substantially.":[160],"Extensive":[161],"experiments":[162],"regular-shaped":[164],"benchmark":[165,168],"arbitrary-shaped":[167],"demonstrate":[169],"that":[170],"SRSTS":[172],"compares":[173],"favorably":[174],"previous":[176],"state-of-the-art":[177],"terms":[180],"both":[182],"accuracy":[183],"efficiency.":[185]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":7}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
