{"id":"https://openalex.org/W4283821822","doi":"https://doi.org/10.1145/3503161.3547784","title":"Reading and Writing: Discriminative and Generative Modeling for Self-Supervised Text Recognition","display_name":"Reading and Writing: Discriminative and Generative Modeling for Self-Supervised Text Recognition","publication_year":2022,"publication_date":"2022-10-10","ids":{"openalex":"https://openalex.org/W4283821822","doi":"https://doi.org/10.1145/3503161.3547784"},"language":"en","primary_location":{"id":"doi:10.1145/3503161.3547784","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503161.3547784","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062346032","display_name":"Mingkun Yang","orcid":"https://orcid.org/0000-0002-9210-4098"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Mingkun Yang","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030174783","display_name":"Minghui Liao","orcid":"https://orcid.org/0000-0002-2583-4314"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minghui Liao","raw_affiliation_strings":["Huawei Cloud, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Huawei Cloud, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055229650","display_name":"Pu Lu","orcid":null},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pu Lu","raw_affiliation_strings":["Huawei Cloud, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Huawei Cloud, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100378720","display_name":"Jing Wang","orcid":"https://orcid.org/0000-0003-4567-3869"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Wang","raw_affiliation_strings":["Huawei Cloud, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Huawei Cloud, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003746644","display_name":"Shenggao Zhu","orcid":"https://orcid.org/0000-0002-3254-0058"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shenggao Zhu","raw_affiliation_strings":["Huawei Cloud, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Huawei Cloud, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022317736","display_name":"Hualin Luo","orcid":null},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hualin Luo","raw_affiliation_strings":["Huawei Cloud, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Huawei Cloud, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100393506","display_name":"Qi Tian","orcid":"https://orcid.org/0000-0002-7252-5047"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qi Tian","raw_affiliation_strings":["Huawei Cloud, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Huawei Cloud, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103173971","display_name":"Xiang Bai","orcid":"https://orcid.org/0000-0003-0006-1287"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiang Bai","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5062346032"],"corresponding_institution_ids":["https://openalex.org/I47720641"],"apc_list":null,"apc_paid":null,"fwci":3.7754,"has_fulltext":false,"cited_by_count":64,"citation_normalized_percentile":{"value":0.9516612,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"4214","last_page":"4223"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9847999811172485,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7967163324356079},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7285412549972534},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7049651145935059},{"id":"https://openalex.org/keywords/text-recognition","display_name":"Text recognition","score":0.652060329914093},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5092000365257263},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5027873516082764},{"id":"https://openalex.org/keywords/reading","display_name":"Reading (process)","score":0.4736952781677246},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.46061989665031433},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.45905694365501404},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.43484798073768616},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4336654841899872},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.35445883870124817},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.09123861789703369}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7967163324356079},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7285412549972534},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7049651145935059},{"id":"https://openalex.org/C2983812711","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Text recognition","level":3,"score":0.652060329914093},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5092000365257263},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5027873516082764},{"id":"https://openalex.org/C554936623","wikidata":"https://www.wikidata.org/wiki/Q199657","display_name":"Reading (process)","level":2,"score":0.4736952781677246},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.46061989665031433},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.45905694365501404},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.43484798073768616},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4336654841899872},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.35445883870124817},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.09123861789703369},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3503161.3547784","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503161.3547784","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.6299999952316284}],"awards":[{"id":"https://openalex.org/G3418581704","display_name":null,"funder_award_id":"61733007","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W1885185971","https://openalex.org/W1922126009","https://openalex.org/W1971822075","https://openalex.org/W2050995497","https://openalex.org/W2133665775","https://openalex.org/W2138621090","https://openalex.org/W2144796873","https://openalex.org/W2146835493","https://openalex.org/W2152928267","https://openalex.org/W2161969291","https://openalex.org/W2194187530","https://openalex.org/W2343052201","https://openalex.org/W2532759528","https://openalex.org/W2740767790","https://openalex.org/W2784050770","https://openalex.org/W2810983211","https://openalex.org/W2914492226","https://openalex.org/W2962790387","https://openalex.org/W2963233387","https://openalex.org/W2963470893","https://openalex.org/W2964312704","https://openalex.org/W2970910956","https://openalex.org/W2978036638","https://openalex.org/W3005436539","https://openalex.org/W3034414401","https://openalex.org/W3034447740","https://openalex.org/W3035449864","https://openalex.org/W3102695566","https://openalex.org/W3110398855","https://openalex.org/W3145450063","https://openalex.org/W3168428721","https://openalex.org/W3175618949","https://openalex.org/W3175855397","https://openalex.org/W3179897446","https://openalex.org/W3181159501","https://openalex.org/W3181186176","https://openalex.org/W3202415716","https://openalex.org/W3202912918","https://openalex.org/W6600339457"],"related_works":["https://openalex.org/W4396941953","https://openalex.org/W2093104230","https://openalex.org/W2987280934","https://openalex.org/W4390874210","https://openalex.org/W4384918963","https://openalex.org/W4365211920","https://openalex.org/W2128027845","https://openalex.org/W3014948380","https://openalex.org/W4386184937","https://openalex.org/W4394728283"],"abstract_inverted_index":{"Existing":[0],"text":[1,44,49,69,117,134,143,162,170,176,181],"recognition":[2,45,50,135,163,171,182],"methods":[3,51,164,183],"usually":[4],"need":[5],"large-scale":[6],"training":[7,15],"data.":[8],"Most":[9],"of":[10,21,42,67,116,124,141],"them":[11],"rely":[12],"on":[13,167,187],"synthetic":[14,33],"data":[16,34],"due":[17],"to":[18,53,78,89,112,136,148,204],"the":[19,32,40,43,65,68,73,80,114,121,138,142,149],"lack":[20],"annotated":[22],"real":[23,36,56],"images.":[24,70],"However,":[25],"there":[26],"is":[27,110,130,146],"a":[28],"domain":[29],"gap":[30],"between":[31],"and":[35,85,92,98],"data,":[37],"which":[38,62,119,145],"limits":[39],"performance":[41,210],"models.":[46],"Recent":[47],"self-supervised":[48,104,161],"attempted":[52],"utilize":[54],"unlabeled":[55],"images":[57],"by":[58,72,94,165,184],"introducing":[59],"contrastive":[60,96,107],"learning,":[61],"mainly":[63],"learns":[64],"discrimination":[66,91,115],"Inspired":[71],"observation":[74],"that":[75,156,196],"humans":[76],"learn":[77,90,113,137],"recognize":[79],"texts":[81],"through":[82],"both":[83],"reading":[84,122],"writing,":[86],"we":[87],"propose":[88],"generation":[93,140],"integrating":[95],"learning":[97,108],"masked":[99,127],"image":[100,128],"modeling":[101,129],"in":[102],"our":[103,157,174,197],"method.":[105],"The":[106,152],"branch":[109],"adopted":[111],"images,":[118,144],"imitates":[120],"behavior":[123],"humans.":[125],"Meanwhile,":[126],"firstly":[131],"introduced":[132],"for":[133],"context":[139],"similar":[147,190],"writing":[150],"behavior.":[151],"experimental":[153],"results":[154],"show":[155],"method":[158],"outperforms":[159],"previous":[160,179],"10.2%-20.2%":[166],"irregular":[168],"scene":[169],"datasets.":[172],"Moreover,":[173],"proposed":[175],"recognizer":[177],"exceeds":[178],"state-of-the-art":[180],"averagely":[185],"5.3%":[186],"11benchmarks,":[188],"with":[189,208],"model":[191,199],"size.":[192],"We":[193],"also":[194],"demonstrate":[195],"pre-trained":[198],"can":[200],"be":[201],"easily":[202],"applied":[203],"other":[205],"text-related":[206],"tasks":[207],"obvious":[209],"gain.":[211]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":18},{"year":2024,"cited_by_count":23},{"year":2023,"cited_by_count":22}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
