{"id":"https://openalex.org/W4402352483","doi":"https://doi.org/10.1109/ijcnn60899.2024.10650315","title":"Text Spotting with a Unified Transformer Decoder","display_name":"Text Spotting with a Unified Transformer Decoder","publication_year":2024,"publication_date":"2024-06-30","ids":{"openalex":"https://openalex.org/W4402352483","doi":"https://doi.org/10.1109/ijcnn60899.2024.10650315"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn60899.2024.10650315","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ijcnn60899.2024.10650315","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112120237","display_name":"Guan-Yi Zheng","orcid":"https://orcid.org/0009-0007-5958-5372"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guanyi Zheng","raw_affiliation_strings":["Xiamen University,School of Informatics,Xiamen,China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Informatics,Xiamen,China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059463815","display_name":"Ming Qiu","orcid":"https://orcid.org/0000-0001-5190-7620"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ming Qiu","raw_affiliation_strings":["Xiamen University,School of Informatics,Xiamen,China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Informatics,Xiamen,China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100622644","display_name":"Xing Gao","orcid":"https://orcid.org/0000-0002-0401-5125"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xing Gao","raw_affiliation_strings":["Xiamen University,School of Informatics,Xiamen,China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Informatics,Xiamen,China","institution_ids":["https://openalex.org/I191208505"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5112120237"],"corresponding_institution_ids":["https://openalex.org/I191208505"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14602404,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"28","issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9646000266075134,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.963699996471405,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spotting","display_name":"Spotting","score":0.7783370018005371},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7255498766899109},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6114206910133362},{"id":"https://openalex.org/keywords/keyword-spotting","display_name":"Keyword spotting","score":0.6013569235801697},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.332586407661438},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2529231905937195},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.2122240662574768},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.142258882522583},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10333874821662903}],"concepts":[{"id":"https://openalex.org/C2779506182","wikidata":"https://www.wikidata.org/wiki/Q7580141","display_name":"Spotting","level":2,"score":0.7783370018005371},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7255498766899109},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6114206910133362},{"id":"https://openalex.org/C2781213101","wikidata":"https://www.wikidata.org/wiki/Q6398558","display_name":"Keyword spotting","level":2,"score":0.6013569235801697},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.332586407661438},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2529231905937195},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.2122240662574768},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.142258882522583},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10333874821662903}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn60899.2024.10650315","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ijcnn60899.2024.10650315","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.46000000834465027,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W2122221966","https://openalex.org/W2127141656","https://openalex.org/W2150839555","https://openalex.org/W2194187530","https://openalex.org/W2194775991","https://openalex.org/W2550687635","https://openalex.org/W2559655401","https://openalex.org/W2593572697","https://openalex.org/W2784050770","https://openalex.org/W2911295582","https://openalex.org/W2962766617","https://openalex.org/W2963840241","https://openalex.org/W2964018263","https://openalex.org/W2967615747","https://openalex.org/W2970910956","https://openalex.org/W2981969038","https://openalex.org/W2983626510","https://openalex.org/W2988098900","https://openalex.org/W2996956254","https://openalex.org/W3005436539","https://openalex.org/W3034792612","https://openalex.org/W3097932944","https://openalex.org/W3102695566","https://openalex.org/W3110398855","https://openalex.org/W3111172959","https://openalex.org/W3152831436","https://openalex.org/W3159307593","https://openalex.org/W3186906052","https://openalex.org/W3196976036","https://openalex.org/W3203616741","https://openalex.org/W4225683503","https://openalex.org/W4283805255","https://openalex.org/W4292692470","https://openalex.org/W4296473552","https://openalex.org/W4304091583","https://openalex.org/W4304092561","https://openalex.org/W4312230431","https://openalex.org/W4312351507","https://openalex.org/W4312879041","https://openalex.org/W4312880451","https://openalex.org/W4386065412","https://openalex.org/W4386075667","https://openalex.org/W6618372016","https://openalex.org/W6757817989","https://openalex.org/W6784094891"],"related_works":["https://openalex.org/W2918559346","https://openalex.org/W2114097550","https://openalex.org/W3119978414","https://openalex.org/W4286904253","https://openalex.org/W2516975559","https://openalex.org/W2545741539","https://openalex.org/W3206647229","https://openalex.org/W1969408022","https://openalex.org/W2000885660","https://openalex.org/W2117995638"],"abstract_inverted_index":{"End-to-end":[0],"scene":[1,121],"text":[2,122],"spotting":[3],"has":[4],"received":[5],"increasing":[6],"research":[7],"attention":[8],"in":[9,17],"recent":[10],"years,":[11],"which":[12,48],"combines":[13],"detection":[14],"and":[15,27,63,73,89,125,130],"recognition":[16],"a":[18,43,108],"single":[19],"framework.":[20],"However,":[21],"most":[22],"existing":[23],"pipelines":[24],"are":[25,83],"complicated":[26],"underutilized":[28],"by":[29],"the":[30,33,51,60,70,75,87,93,113,135],"synergy":[31,72],"between":[32],"sub-tasks.":[34,56],"In":[35],"this":[36],"paper,":[37],"we":[38,58],"present":[39],"Text":[40],"Spotting":[41],"with":[42],"Unified":[44],"Transformer":[45],"Decoder":[46],"(TSUTD),":[47],"directly":[49],"exploits":[50],"unified":[52],"features":[53],"for":[54,95],"both":[55],"Technically,":[57],"unify":[59],"backbone,":[61],"encoder,":[62],"decoder":[64],"to":[65,85,111],"take":[66],"full":[67],"advantage":[68],"of":[69,134],"intrinsic":[71],"excavate":[74],"common":[76],"features.":[77],"Only":[78],"two":[79],"simple":[80],"prediction":[81],"heads":[82],"utilized":[84],"obtain":[86],"location":[88],"transcriptions":[90],"directly,":[91],"eliminating":[92],"need":[94],"heuristics-driven":[96],"post-processing":[97],"procedures":[98],"or":[99],"any":[100],"other":[101],"redundant":[102],"predictions.":[103],"Furthermore,":[104],"our":[105],"method":[106],"introduces":[107],"Visual":[109],"prompter":[110],"instruct":[112],"subsequent":[114],"process.":[115],"Quantitative":[116],"experiments":[117],"on":[118],"arbitrarily":[119],"shaped":[120],"datasets":[123],"Total-Text":[124],"SCUT-CTW1500":[126],"demonstrate":[127],"competitive":[128],"performances":[129],"significant":[131],"efficiency":[132],"advantages":[133],"proposed":[136],"method.":[137]},"counts_by_year":[],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
