{"id":"https://openalex.org/W4403780826","doi":"https://doi.org/10.1145/3664647.3681551","title":"Trust Prophet or Not? Taking a Further Verification Step toward Accurate Scene Text Recognition","display_name":"Trust Prophet or Not? Taking a Further Verification Step toward Accurate Scene Text Recognition","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403780826","doi":"https://doi.org/10.1145/3664647.3681551"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681551","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681551","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3664647.3681551?download=true","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3664647.3681551?download=true","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100931685","display_name":"Anna Zhu","orcid":"https://orcid.org/0000-0001-6965-9506"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Anna Zhu","raw_affiliation_strings":["Wuhan University of Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Wuhan University of Technology, Wuhan, China","institution_ids":["https://openalex.org/I196699116"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068580082","display_name":"Ke Xiao","orcid":"https://orcid.org/0009-0003-3959-4520"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ke Xiao","raw_affiliation_strings":["Wuhan University of Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Wuhan University of Technology, Wuhan, China","institution_ids":["https://openalex.org/I196699116"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101758086","display_name":"Bo Zhou","orcid":"https://orcid.org/0000-0001-6682-0624"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Zhou","raw_affiliation_strings":["Wuhan University of Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Wuhan University of Technology, Wuhan, China","institution_ids":["https://openalex.org/I196699116"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049658962","display_name":"Runmin Wang","orcid":"https://orcid.org/0000-0001-9687-9918"},"institutions":[{"id":"https://openalex.org/I173759888","display_name":"Hunan Normal University","ror":"https://ror.org/053w1zy07","country_code":"CN","type":"education","lineage":["https://openalex.org/I173759888"]},{"id":"https://openalex.org/I3130607311","display_name":"Changsha Normal University","ror":"https://ror.org/02sqk3z62","country_code":"CN","type":"education","lineage":["https://openalex.org/I3130607311"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Runmin Wang","raw_affiliation_strings":["Hunan Normal University, Changsha, China"],"affiliations":[{"raw_affiliation_string":"Hunan Normal University, Changsha, China","institution_ids":["https://openalex.org/I3130607311","https://openalex.org/I173759888"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100931685"],"corresponding_institution_ids":["https://openalex.org/I196699116"],"apc_list":null,"apc_paid":null,"fwci":1.2231,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.81077188,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1741","last_page":"1750"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9872000217437744,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7120167016983032},{"id":"https://openalex.org/keywords/text-recognition","display_name":"Text recognition","score":0.6126562356948853},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.459127277135849},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.40069806575775146},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.38166195154190063},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3577656149864197},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.14290454983711243}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7120167016983032},{"id":"https://openalex.org/C2983812711","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Text recognition","level":3,"score":0.6126562356948853},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.459127277135849},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.40069806575775146},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.38166195154190063},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3577656149864197},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.14290454983711243}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3681551","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681551","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3664647.3681551?download=true","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3664647.3681551","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681551","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3664647.3681551?download=true","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3910605392","display_name":null,"funder_award_id":"NSCQ-","funder_id":"https://openalex.org/F4320323172","funder_display_name":"Natural Science Foundation of Chongqing"},{"id":"https://openalex.org/G4427435602","display_name":null,"funder_award_id":"CSTB2023NSCQ","funder_id":"https://openalex.org/F4320323172","funder_display_name":"Natural Science Foundation of Chongqing"}],"funders":[{"id":"https://openalex.org/F4320322186","display_name":"Natural Science Foundation of Hubei Province","ror":null},{"id":"https://openalex.org/F4320323172","display_name":"Natural Science Foundation of Chongqing","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320327639","display_name":"Centre Scientifique et Technique du B\u00e2timent","ror":"https://ror.org/02fsd1928"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4403780826.pdf","grobid_xml":"https://content.openalex.org/works/W4403780826.grobid-xml"},"referenced_works_count":18,"referenced_works":["https://openalex.org/W569478347","https://openalex.org/W1971822075","https://openalex.org/W2194187530","https://openalex.org/W2560662850","https://openalex.org/W2810983211","https://openalex.org/W2875814315","https://openalex.org/W2962739339","https://openalex.org/W2978036638","https://openalex.org/W2998382406","https://openalex.org/W3157598734","https://openalex.org/W3202415716","https://openalex.org/W3207095487","https://openalex.org/W4225562651","https://openalex.org/W4229030834","https://openalex.org/W4285531802","https://openalex.org/W4304084214","https://openalex.org/W4385767840","https://openalex.org/W4391650137"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Inducing":[0],"linguistic":[1,46,195],"knowledge":[2,47],"for":[3,16,31,133,176,210],"scene":[4],"text":[5],"recognition":[6],"(STR)":[7],"is":[8],"a":[9,73,108,158,206],"new":[10],"trend":[11],"that":[12,217],"could":[13,198],"provide":[14],"semantics":[15],"performance":[17],"boost.":[18],"However,":[19],"most":[20],"autoregressive":[21],"STR":[22,76],"models":[23,43],"optimize":[24],"one-step":[25],"ahead":[26],"prediction":[27,131,178],"(i.e.,":[28],"1-gram":[29],"prediction)":[30],"character":[32,136],"sequence,":[33],"which":[34,59,197],"only":[35,44],"utilizes":[36],"the":[37,50,55,64,87,92,98,111,120,125,130,134,163,229],"previous":[38,99,121],"semantic":[39,122,165,173],"context.":[40],"Most":[41],"non-autoregressive":[42],"apply":[45],"individually":[48],"on":[49,97,222],"output":[51],"sequence":[52],"to":[53,89,113,184],"refine":[54],"results":[56,117,132],"in":[57,86,180],"parallel,":[58],"do":[60],"not":[61,145],"fully":[62],"utilize":[63],"visual":[65,170,192],"clues":[66],"concurrently.":[67],"In":[68],"this":[69],"paper,":[70],"we":[71,143,156,204],"propose":[72,157,205],"novel":[74],"language-based":[75],"model,":[77],"called":[78],"ProphetSTR.":[79],"It":[80,105,182],"adopts":[81],"an":[82],"n-stream":[83],"attention":[84],"mechanism":[85],"decoder":[88],"simultaneously":[90,175],"predict":[91,114],"next":[93],"n":[94],"characters":[95],"based":[96],"predictions":[100],"at":[101,137],"each":[102],"time":[103,139],"step.":[104],"behaves":[106],"like":[107],"prophet,":[109],"encouraging":[110],"model":[112],"more":[115,200],"accurate":[116],"by":[118],"utilizing":[119],"information":[123],"and":[124,167,171,189,194],"near":[126],"future":[127],"clues.":[128],"If":[129],"same":[135],"successive":[138],"steps":[140],"are":[141,152],"inconsistent,":[142],"should":[144],"trust":[146],"any":[147],"of":[148,231],"them.":[149],"Otherwise,":[150],"they":[151],"reliable":[153,201],"predictions.":[154],"Therefore,":[155],"multi-modality":[159],"verification":[160],"module,":[161],"masking":[162],"unreliable":[164],"features":[166],"inputting":[168],"with":[169],"trusted":[172],"ones":[174],"masked":[177],"recovery":[179],"parallel.":[181],"learns":[183],"align":[185],"different":[186],"modalities":[187],"implicitly":[188],"considers":[190],"both":[191],"context":[193],"knowledge,":[196],"generate":[199],"results.":[202],"Furthermore,":[203],"multi-scale":[207],"weight-sharing":[208],"encoder":[209],"multi-granularity":[211],"image":[212],"representation.":[213],"Extensive":[214],"experiments":[215],"demonstrate":[216],"ProphetSTR":[218],"achieves":[219],"state-of-the-art":[220],"performances":[221],"many":[223],"benchmarks.":[224],"Further":[225],"ablative":[226],"studies":[227],"prove":[228],"effectiveness":[230],"our":[232],"proposed":[233],"components.":[234]},"counts_by_year":[{"year":2025,"cited_by_count":5}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
