{"id":"https://openalex.org/W4391931509","doi":"https://doi.org/10.1145/3646551","title":"Multimodal Visual-Semantic Representations Learning for Scene Text Recognition","display_name":"Multimodal Visual-Semantic Representations Learning for Scene Text Recognition","publication_year":2024,"publication_date":"2024-02-19","ids":{"openalex":"https://openalex.org/W4391931509","doi":"https://doi.org/10.1145/3646551"},"language":"en","primary_location":{"id":"doi:10.1145/3646551","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3646551","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3646551","source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3646551","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101420812","display_name":"Xinjian Gao","orcid":"https://orcid.org/0000-0002-9795-5908"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xinjian Gao","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"raw_orcid":"https://orcid.org/0000-0002-9795-5908","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102006884","display_name":"Ye Pang","orcid":"https://orcid.org/0009-0005-6393-8930"},"institutions":[{"id":"https://openalex.org/I4401726822","display_name":"Ping An (China)","ror":"https://ror.org/004yv2z91","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726822"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ye Pang","raw_affiliation_strings":["Ping An Technology Co., Ltd, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0005-6393-8930","affiliations":[{"raw_affiliation_string":"Ping An Technology Co., Ltd, Beijing, China","institution_ids":["https://openalex.org/I4401726822"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101979565","display_name":"Yuyu Liu","orcid":"https://orcid.org/0009-0005-1541-5000"},"institutions":[{"id":"https://openalex.org/I4401726822","display_name":"Ping An (China)","ror":"https://ror.org/004yv2z91","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726822"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuyu Liu","raw_affiliation_strings":["Ping An Technology Co., Ltd, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0005-1541-5000","affiliations":[{"raw_affiliation_string":"Ping An Technology Co., Ltd, Beijing, China","institution_ids":["https://openalex.org/I4401726822"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034261147","display_name":"Maokun Han","orcid":"https://orcid.org/0009-0007-9335-4025"},"institutions":[{"id":"https://openalex.org/I4401726822","display_name":"Ping An (China)","ror":"https://ror.org/004yv2z91","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726822"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Maokun Han","raw_affiliation_strings":["Ping An Technology Co., Ltd, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0007-9335-4025","affiliations":[{"raw_affiliation_string":"Ping An Technology Co., Ltd, Beijing, China","institution_ids":["https://openalex.org/I4401726822"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048818071","display_name":"Jun Yu","orcid":"https://orcid.org/0000-0002-3197-8103"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Yu","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"raw_orcid":"https://orcid.org/0000-0002-3197-8103","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100392327","display_name":"Wei Wang","orcid":"https://orcid.org/0009-0003-0550-497X"},"institutions":[{"id":"https://openalex.org/I4401726822","display_name":"Ping An (China)","ror":"https://ror.org/004yv2z91","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726822"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Wang","raw_affiliation_strings":["Ping An Technology Co., Ltd, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-0550-497X","affiliations":[{"raw_affiliation_string":"Ping An Technology Co., Ltd, Beijing, China","institution_ids":["https://openalex.org/I4401726822"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5090416286","display_name":"Yuanxu Chen","orcid":"https://orcid.org/0009-0005-9132-3837"},"institutions":[{"id":"https://openalex.org/I4401726822","display_name":"Ping An (China)","ror":"https://ror.org/004yv2z91","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726822"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanxu Chen","raw_affiliation_strings":["Ping An Technology Co., Ltd, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0005-9132-3837","affiliations":[{"raw_affiliation_string":"Ping An Technology Co., Ltd, Beijing, China","institution_ids":["https://openalex.org/I4401726822"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101420812"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.4762,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.59224854,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"20","issue":"7","first_page":"1","last_page":"18"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9848999977111816,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9847999811172485,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5872253179550171},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5653737187385559},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5095089077949524}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5872253179550171},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5653737187385559},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5095089077949524}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3646551","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3646551","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3646551","source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3646551","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3646551","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3646551","source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7200000286102295}],"awards":[{"id":"https://openalex.org/G450612882","display_name":null,"funder_award_id":"62276242","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320325599","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4391931509.pdf","grobid_xml":"https://content.openalex.org/works/W4391931509.grobid-xml"},"referenced_works_count":58,"referenced_works":["https://openalex.org/W1491389626","https://openalex.org/W1922126009","https://openalex.org/W1971822075","https://openalex.org/W1998042868","https://openalex.org/W2067953788","https://openalex.org/W2194187530","https://openalex.org/W2750938222","https://openalex.org/W2752225195","https://openalex.org/W2810983211","https://openalex.org/W2875814315","https://openalex.org/W2891021031","https://openalex.org/W2896457183","https://openalex.org/W2908791737","https://openalex.org/W2953388933","https://openalex.org/W2963233387","https://openalex.org/W2963705779","https://openalex.org/W2963712589","https://openalex.org/W2965066169","https://openalex.org/W2979371747","https://openalex.org/W2988098900","https://openalex.org/W2996609910","https://openalex.org/W2997351497","https://openalex.org/W2997749585","https://openalex.org/W2998382406","https://openalex.org/W3004846386","https://openalex.org/W3030971704","https://openalex.org/W3034414401","https://openalex.org/W3035449864","https://openalex.org/W3035679705","https://openalex.org/W3040521064","https://openalex.org/W3042760913","https://openalex.org/W3043311956","https://openalex.org/W3095672411","https://openalex.org/W3096966883","https://openalex.org/W3106271744","https://openalex.org/W3110267192","https://openalex.org/W3145450063","https://openalex.org/W3170863103","https://openalex.org/W3173990630","https://openalex.org/W3175618949","https://openalex.org/W3181186176","https://openalex.org/W3185705915","https://openalex.org/W3202415716","https://openalex.org/W3202912918","https://openalex.org/W3203487993","https://openalex.org/W3204479434","https://openalex.org/W3213836217","https://openalex.org/W3216857888","https://openalex.org/W4280634601","https://openalex.org/W4283802017","https://openalex.org/W4292779060","https://openalex.org/W4301185181","https://openalex.org/W4313156423","https://openalex.org/W6744310024","https://openalex.org/W6778883912","https://openalex.org/W6796761347","https://openalex.org/W6803870738","https://openalex.org/W6804160461"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W4391913857","https://openalex.org/W2350741829","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Scene":[0,114],"Text":[1,100,115],"Recognition":[2,101],"(STR),":[3],"the":[4,35,39,44,54,73,87,108,133,141,145,149,154,162,172,184],"critical":[5],"step":[6],"in":[7,14,48,57,71,90],"OCR":[8],"systems,":[9],"has":[10,27,75,132],"attracted":[11],"much":[12],"attention":[13],"computer":[15],"vision.":[16],"Recent":[17],"research":[18],"on":[19,195],"modeling":[20],"textual":[21,176],"semantics":[22,138,177],"with":[23],"Language":[24],"Model":[25,46],"(LM)":[26],"witnessed":[28],"remarkable":[29],"progress.":[30],"However,":[31],"LM":[32,181],"only":[33],"optimizes":[34],"joint":[36],"probability":[37],"of":[38],"estimated":[40],"characters":[41],"generated":[42],"from":[43,148,178],"Vision":[45],"(VM)":[47],"a":[49,121],"single":[50],"language":[51,126],"modality,":[52],"ignoring":[53],"visual-semantic":[55,110],"relations":[56],"different":[58],"modalities.":[59],"Thus,":[60],"LM-based":[61],"methods":[62],"can":[63],"hardly":[64],"generalize":[65],"well":[66],"to":[67,104,135,143,170,182],"some":[68],"challenging":[69],"conditions,":[70],"which":[72],"text":[74,151],"weak":[76],"or":[77],"multiple":[78],"semantics,":[79],"arbitrary":[80],"shape,":[81],"and":[82,106,125,131,159,175,180],"so":[83],"on.":[84],"To":[85],"migrate":[86],"above":[88],"issue,":[89],"this":[91],"paper,":[92],"we":[93],"propose":[94],"Multimodal":[95],"Visual-Semantic":[96],"Representations":[97],"Learning":[98],"for":[99,112],"Network":[102],"(MVSTRN)":[103],"reason":[105,136],"combine":[107,171],"multimodal":[109,164,173],"information":[111],"accurate":[113],"Recognition.":[116],"Specifically,":[117],"our":[118,190],"MVSTRN":[119,191],"builds":[120],"bridge":[122],"between":[123,157],"vision":[124,158],"through":[127],"its":[128],"unified":[129],"architecture":[130],"ability":[134],"visual":[137,174],"by":[139],"guiding":[140],"network":[142],"reconstruct":[144],"original":[146],"image":[147],"latent":[150],"representation,":[152],"breaking":[153],"structural":[155],"gap":[156],"language.":[160],"Finally,":[161],"tailored":[163],"Fusion":[165],"(MMF)":[166],"module":[167],"is":[168],"motivated":[169],"VM":[179],"make":[183],"final":[185],"predictions.":[186],"Extensive":[187],"experiments":[188],"demonstrate":[189],"achieves":[192],"state-of-the-art":[193],"performance":[194],"several":[196],"benchmarks.":[197]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
