{"id":"https://openalex.org/W4319777850","doi":"https://doi.org/10.1109/tpami.2022.3230962","title":"Image-to-Character-to-Word Transformers for Accurate Scene Text Recognition","display_name":"Image-to-Character-to-Word Transformers for Accurate Scene Text Recognition","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4319777850","doi":"https://doi.org/10.1109/tpami.2022.3230962","pmid":"https://pubmed.ncbi.nlm.nih.gov/37022831"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2022.3230962","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2022.3230962","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068548252","display_name":"Chuhui Xue","orcid":"https://orcid.org/0000-0002-3562-3094"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Chuhui Xue","raw_affiliation_strings":["School of Computer Science and Engineering, Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067989804","display_name":"Jiaxing Huang","orcid":"https://orcid.org/0000-0002-8681-0471"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Jiaxing Huang","raw_affiliation_strings":["School of Computer Science and Engineering, Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100340238","display_name":"Wenqing Zhang","orcid":"https://orcid.org/0000-0003-3479-282X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wenqing Zhang","raw_affiliation_strings":["Bytedance Inc., China"],"affiliations":[{"raw_affiliation_string":"Bytedance Inc., China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023507910","display_name":"Shijian Lu","orcid":"https://orcid.org/0000-0002-6766-2506"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Shijian Lu","raw_affiliation_strings":["School of Computer Science and Engineering, Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055085354","display_name":"Changhu Wang","orcid":"https://orcid.org/0000-0001-8373-2597"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Changhu Wang","raw_affiliation_strings":["Bytedance Inc., China"],"affiliations":[{"raw_affiliation_string":"Bytedance Inc., China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101562162","display_name":"Song Bai","orcid":"https://orcid.org/0000-0002-2570-9118"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song Bai","raw_affiliation_strings":["Bytedance Inc., China"],"affiliations":[{"raw_affiliation_string":"Bytedance Inc., China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5068548252"],"corresponding_institution_ids":["https://openalex.org/I172675005"],"apc_list":null,"apc_paid":null,"fwci":2.7014,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.91814402,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"45","issue":"11","first_page":"1","last_page":"14"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9825000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8269069194793701},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7452747225761414},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.6130530834197998},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.5235592722892761},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.4940965473651886},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4516125023365021},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.41845160722732544},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3732331097126007},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.36458268761634827},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.36074239015579224},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3286735713481903}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8269069194793701},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7452747225761414},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.6130530834197998},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.5235592722892761},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.4940965473651886},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4516125023365021},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.41845160722732544},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3732331097126007},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.36458268761634827},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36074239015579224},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3286735713481903},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tpami.2022.3230962","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2022.3230962","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:37022831","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/37022831","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null},{"id":"pmh:oai:dr.ntu.edu.sg:10356/172173","is_oa":false,"landing_page_url":"https://hdl.handle.net/10356/172173","pdf_url":null,"source":{"id":"https://openalex.org/S4306402609","display_name":"DR-NTU (Nanyang Technological University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I172675005","host_organization_name":"Nanyang Technological University","host_organization_lineage":["https://openalex.org/I172675005"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Journal Article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6100000143051147,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":90,"referenced_works":["https://openalex.org/W603908379","https://openalex.org/W1491389626","https://openalex.org/W1922126009","https://openalex.org/W1971822075","https://openalex.org/W1998042868","https://openalex.org/W2008806374","https://openalex.org/W2049951199","https://openalex.org/W2110485445","https://openalex.org/W2127141656","https://openalex.org/W2128409098","https://openalex.org/W2133564696","https://openalex.org/W2144554289","https://openalex.org/W2146835493","https://openalex.org/W2153182373","https://openalex.org/W2194187530","https://openalex.org/W2294053032","https://openalex.org/W2343052201","https://openalex.org/W2593572697","https://openalex.org/W2740767790","https://openalex.org/W2751748110","https://openalex.org/W2754613063","https://openalex.org/W2788069964","https://openalex.org/W2788840914","https://openalex.org/W2795619303","https://openalex.org/W2810983211","https://openalex.org/W2873558679","https://openalex.org/W2875814315","https://openalex.org/W2895160027","https://openalex.org/W2896457183","https://openalex.org/W2904785373","https://openalex.org/W2914492226","https://openalex.org/W2952285877","https://openalex.org/W2959965583","https://openalex.org/W2962790387","https://openalex.org/W2963233387","https://openalex.org/W2963299604","https://openalex.org/W2963327605","https://openalex.org/W2963517393","https://openalex.org/W2963712589","https://openalex.org/W2965066169","https://openalex.org/W2970910956","https://openalex.org/W2979371747","https://openalex.org/W2981413347","https://openalex.org/W2997749585","https://openalex.org/W2998382406","https://openalex.org/W3004846386","https://openalex.org/W3005436539","https://openalex.org/W3013224334","https://openalex.org/W3015415468","https://openalex.org/W3034447740","https://openalex.org/W3035022492","https://openalex.org/W3035106683","https://openalex.org/W3035449864","https://openalex.org/W3042760913","https://openalex.org/W3043311956","https://openalex.org/W3092462694","https://openalex.org/W3094502228","https://openalex.org/W3096609285","https://openalex.org/W3103667010","https://openalex.org/W3107840264","https://openalex.org/W3110267192","https://openalex.org/W3121523901","https://openalex.org/W3131500599","https://openalex.org/W3138516171","https://openalex.org/W3165924482","https://openalex.org/W3171125843","https://openalex.org/W3175618949","https://openalex.org/W3181186176","https://openalex.org/W3199093552","https://openalex.org/W3199245537","https://openalex.org/W3201786596","https://openalex.org/W3202912918","https://openalex.org/W4385245566","https://openalex.org/W4386792828","https://openalex.org/W6618372016","https://openalex.org/W6629590909","https://openalex.org/W6679434410","https://openalex.org/W6739901393","https://openalex.org/W6744133147","https://openalex.org/W6744179516","https://openalex.org/W6755207826","https://openalex.org/W6763509872","https://openalex.org/W6764306230","https://openalex.org/W6779879114","https://openalex.org/W6784094891","https://openalex.org/W6784333009","https://openalex.org/W6788135285","https://openalex.org/W6790749177","https://openalex.org/W6796721132","https://openalex.org/W6801512601"],"related_works":["https://openalex.org/W4251972423","https://openalex.org/W1503216044","https://openalex.org/W2393609567","https://openalex.org/W2369369044","https://openalex.org/W2354143083","https://openalex.org/W2372906645","https://openalex.org/W4319998713","https://openalex.org/W2366269494","https://openalex.org/W1991513203","https://openalex.org/W3178467699"],"abstract_inverted_index":{"Leveraging":[0],"the":[1,55,140,163,177,181],"advances":[2],"of":[3,29,42,62,109,118,151],"natural":[4],"language":[5],"processing,":[6],"most":[7],"recent":[8],"scene":[9,35,76,90,134,188,207],"text":[10,17,36,77,91,135,165,189,208],"recognizers":[11],"adopt":[12],"an":[13,122],"encoder-decoder":[14],"architecture":[15],"where":[16],"images":[18,37,113],"are":[19],"first":[20,98],"converted":[21],"to":[22,59,83],"representative":[23],"features":[24,64,120],"and":[25,49,57,85,194],"then":[26],"a":[27,74,107],"sequence":[28],"characters":[30],"via":[31],"'sequential":[32],"decoding'.":[33],"However,":[34],"suffer":[38],"from":[39,112,139,147],"rich":[40],"noises":[41],"different":[43,116],"sources":[44],"such":[45],"as":[46],"complex":[47],"background":[48],"geometric":[50,84],"distortions":[51],"which":[52,105,132,161],"often":[53],"confuse":[54],"decoder":[56],"lead":[58],"incorrect":[60],"alignment":[61],"visual":[63,119],"at":[65],"noisy":[66,152],"decoding":[67,137],"time":[68],"steps.":[69],"This":[70],"paper":[71],"presents":[72],"I2C2W,":[73],"novel":[75],"recognition":[78,92,166,202],"technique":[79],"that":[80,176],"is":[81],"tolerant":[82],"photometric":[86],"degradation":[87],"by":[88,136,183],"decomposing":[89],"into":[93],"two":[94],"inter-connected":[95],"tasks.":[96],"The":[97,125,144],"task":[99,127],"focuses":[100],"on":[101,115],"image-to-character":[102],"(I2C)":[103],"mapping":[104,131],"detects":[106],"set":[108],"character":[110,142,148,158],"candidates":[111,159],"based":[114],"alignments":[117],"in":[121],"non-sequential":[123],"way.":[124],"second":[126],"tackles":[128],"character-to-word":[129],"(C2W)":[130],"recognizes":[133],"words":[138],"detected":[141,157],"candidates.":[143],"direct":[145],"learning":[146],"semantics":[149],"(instead":[150],"image":[153],"features)":[154],"corrects":[155],"falsely":[156],"effectively":[160],"improves":[162],"final":[164],"accuracy":[167],"greatly.":[168],"Extensive":[169],"experiments":[170],"over":[171,204],"nine":[172],"public":[173],"datasets":[174,190],"show":[175],"proposed":[178],"I2C2W":[179],"outperforms":[180],"state-of-the-art":[182],"large":[184],"margins":[185],"for":[186],"challenging":[187],"with":[191],"various":[192],"curvature":[193],"perspective":[195],"distortions.":[196],"It":[197],"also":[198],"achieves":[199],"very":[200],"competitive":[201],"performance":[203],"multiple":[205],"normal":[206],"datasets.":[209]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":12},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
