{"id":"https://openalex.org/W7138225450","doi":"https://doi.org/10.1609/aaai.v40i10.37732","title":"One2Seq: One-Token Wise Decoder for Efficient Scene Text Recognition","display_name":"One2Seq: One-Token Wise Decoder for Efficient Scene Text Recognition","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138225450","doi":"https://doi.org/10.1609/aaai.v40i10.37732"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i10.37732","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i10.37732","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i10.37732","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129683206","display_name":"Zhibin Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhibin Ma","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129658661","display_name":"Pengwen Dai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pengwen Dai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101504155","display_name":"Wei Zhuo","orcid":"https://orcid.org/0009-0004-5780-0364"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei Zhuo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5014055218","display_name":"Xugong Qin","orcid":"https://orcid.org/0009-0004-3130-3220"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xugong Qin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5129683206"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.5585384,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"10","first_page":"7883","last_page":"7891"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.0007999999797903001,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.000699999975040555,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.7616999745368958},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.5396000146865845},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.5325000286102295},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5074999928474426},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5042999982833862},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.4426000118255615},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4041999876499176},{"id":"https://openalex.org/keywords/context-model","display_name":"Context model","score":0.38999998569488525}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8328999876976013},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.7616999745368958},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5533999800682068},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.5396000146865845},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.5325000286102295},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5074999928474426},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5042999982833862},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.4426000118255615},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4041999876499176},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.38999998569488525},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3716000020503998},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.3702999949455261},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.359499990940094},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.35850000381469727},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.35740000009536743},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3271999955177307},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3246000111103058},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.31869998574256897},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.3066999912261963},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.26269999146461487},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i10.37732","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i10.37732","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i10.37732","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i10.37732","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.7465566992759705,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Auto-regressive":[0],"(AR)-based":[1],"decoders,":[2],"owing":[3],"to":[4,68,71,95,117,156,167,189,223],"their":[5,12,50],"flexibility":[6],"in":[7,15,26,52,128],"handling":[8],"variable-length":[9],"outputs":[10],"and":[11,44,89,219],"strong":[13],"capability":[14],"modeling":[16],"character-level":[17],"dependencies,":[18,48],"have":[19],"emerged":[20],"as":[21],"the":[22,27,73,81,97,101,106,119,123,129,140,146,151,174,179,183,195],"predominant":[23],"decoding":[24,42,220],"paradigm":[25,62],"field":[28],"of":[29,108,197],"scene":[30],"text":[31],"recognition":[32,202],"(STR).":[33],"However,":[34],"AR-based":[35,64],"decoders":[36],"suffer":[37],"from":[38],"attention":[39,102],"drift,":[40],"slow":[41],"speed,":[43],"difficulty":[45],"capturing":[46],"global":[47,158,170,198],"restricting":[49],"performance":[51],"various":[53],"scenarios.":[54],"In":[55],"this":[56,135],"paper,":[57],"we":[58,79,112,162],"propose":[59,163],"a":[60,85,91],"novel":[61],"for":[63,160],"decoding,":[65,98,161],"called":[66],"One-Token":[67,92],"Sequence":[69],"(One2Seq),":[70],"address":[72],"above":[74],"issues.":[75],"Unlike":[76],"existing":[77,224],"methods,":[78],"encode":[80],"semantic":[82,109,142],"features":[83,172,193],"into":[84,173],"single":[86],"context":[87,130,175,181],"token":[88],"design":[90],"Wise":[93],"Decoder":[94],"perform":[96],"which":[99],"alleviates":[100],"drift":[103],"caused":[104],"by":[105],"accumulation":[107],"information.":[110],"Moreover,":[111],"proposed":[113],"Positioal-aware":[114],"Hash":[115],"Embedding":[116],"embed":[118],"decoded":[120,141],"characters,":[121],"ensuring":[122],"order":[124],"information":[125,143,159],"is":[126],"obtained":[127],"token.":[131,176],"By":[132],"continuously":[133],"updating":[134],"token,":[136,182],"One2Seq":[137,212],"fully":[138],"leverages":[139],"while":[144],"avoiding":[145],"computational":[147],"overhead":[148],"associated":[149],"with":[150,178,208],"growing":[152],"query":[153],"sequence.":[154],"Furthermore,":[155],"leverage":[157],"Dynamic":[164],"Global":[165],"Infusion":[166],"dynamically":[168],"integrates":[169],"visual":[171],"Equipped":[177],"enriched":[180],"model":[184],"has":[185],"an":[186],"enhanced":[187],"ability":[188],"extract":[190],"discriminative":[191],"local":[192],"under":[194],"guidance":[196],"context,":[199],"thereby":[200],"enhancing":[201],"accuracy.":[203],"Extensive":[204],"experiments":[205],"reveal":[206],"that,":[207],"its":[209],"ingenious":[210],"design,":[211],"exhibits":[213],"marked":[214],"superiority":[215],"on":[216],"both":[217],"accuracy":[218],"speed":[221],"compared":[222],"STR":[225],"models.":[226]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
