{"id":"https://openalex.org/W4375869284","doi":"https://doi.org/10.1109/icassp49357.2023.10095180","title":"Difficulty-Aware Data Augmentor for Scene Text Recognition","display_name":"Difficulty-Aware Data Augmentor for Scene Text Recognition","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375869284","doi":"https://doi.org/10.1109/icassp49357.2023.10095180"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095180","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095180","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102398518","display_name":"Guanghao Meng","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I4210114105","display_name":"Tsinghua\u2013Berkeley Shenzhen Institute","ror":"https://ror.org/02hhwwz98","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210114105","https://openalex.org/I95457486","https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guanghao Meng","raw_affiliation_strings":["Tsinghua University,Tsinghua Shenzhen International Graduate School","Peng Cheng Laboratory, Shenzhen, China","Tsinghua Shenzhen International Graduate School, Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Tsinghua Shenzhen International Graduate School","institution_ids":["https://openalex.org/I4210114105","https://openalex.org/I99065089"]},{"raw_affiliation_string":"Peng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]},{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023762528","display_name":"Tao Dai","orcid":"https://orcid.org/0000-0003-0594-6404"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Dai","raw_affiliation_strings":["Shenzhen University,College of Computer Science and Software Engineering","College of Computer Science and Software Engineering, Shenzhen University"],"affiliations":[{"raw_affiliation_string":"Shenzhen University,College of Computer Science and Software Engineering","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100427338","display_name":"Bin Chen","orcid":"https://orcid.org/0000-0002-4798-230X"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin Chen","raw_affiliation_strings":["Harbin Institute of Technology,Department of Computer Science and Technology,Shenzhen","Department of Computer Science and Technology, Harbin Institute of Technology, Shenzhen"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology,Department of Computer Science and Technology,Shenzhen","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Department of Computer Science and Technology, Harbin Institute of Technology, Shenzhen","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016301762","display_name":"Naiqi Li","orcid":"https://orcid.org/0000-0002-6472-0678"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I4210114105","display_name":"Tsinghua\u2013Berkeley Shenzhen Institute","ror":"https://ror.org/02hhwwz98","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210114105","https://openalex.org/I95457486","https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Naiqi Li","raw_affiliation_strings":["Tsinghua University,Tsinghua Shenzhen International Graduate School","Tsinghua Shenzhen International Graduate School, Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Tsinghua Shenzhen International Graduate School","institution_ids":["https://openalex.org/I4210114105","https://openalex.org/I99065089"]},{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101626204","display_name":"Yong Jiang","orcid":"https://orcid.org/0000-0002-4260-1395"},"institutions":[{"id":"https://openalex.org/I4210114105","display_name":"Tsinghua\u2013Berkeley Shenzhen Institute","ror":"https://ror.org/02hhwwz98","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210114105","https://openalex.org/I95457486","https://openalex.org/I99065089"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yong Jiang","raw_affiliation_strings":["Tsinghua University,Tsinghua Shenzhen International Graduate School","Peng Cheng Laboratory, Shenzhen, China","Tsinghua Shenzhen International Graduate School, Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Tsinghua Shenzhen International Graduate School","institution_ids":["https://openalex.org/I4210114105","https://openalex.org/I99065089"]},{"raw_affiliation_string":"Peng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]},{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034104790","display_name":"Shu\u2010Tao Xia","orcid":"https://orcid.org/0000-0002-8639-982X"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I4210114105","display_name":"Tsinghua\u2013Berkeley Shenzhen Institute","ror":"https://ror.org/02hhwwz98","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210114105","https://openalex.org/I95457486","https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shu-Tao Xia","raw_affiliation_strings":["Tsinghua University,Tsinghua Shenzhen International Graduate School","Tsinghua Shenzhen International Graduate School, Tsinghua University","Peng Cheng Laboratory, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Tsinghua Shenzhen International Graduate School","institution_ids":["https://openalex.org/I4210114105","https://openalex.org/I99065089"]},{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Peng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5102398518"],"corresponding_institution_ids":["https://openalex.org/I4210114105","https://openalex.org/I4210136793","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04053301,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9868000149726868,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.863975465297699},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5868421792984009},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5506606698036194},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5381888151168823},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5290840268135071},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.4538407027721405},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.449310302734375},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4256833493709564},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3655283451080322},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3217284083366394}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.863975465297699},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5868421792984009},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5506606698036194},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5381888151168823},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5290840268135071},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.4538407027721405},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.449310302734375},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4256833493709564},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3655283451080322},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3217284083366394},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095180","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095180","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.5199999809265137}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W6908809","https://openalex.org/W1491389626","https://openalex.org/W1971822075","https://openalex.org/W1998042868","https://openalex.org/W2008806374","https://openalex.org/W2049951199","https://openalex.org/W2053317383","https://openalex.org/W2099247484","https://openalex.org/W2144554289","https://openalex.org/W2146835493","https://openalex.org/W2163605009","https://openalex.org/W2343052201","https://openalex.org/W2810983211","https://openalex.org/W2965066169","https://openalex.org/W2969649120","https://openalex.org/W3034447740","https://openalex.org/W3035449864","https://openalex.org/W3035682737","https://openalex.org/W3042760913","https://openalex.org/W3130325649","https://openalex.org/W3134064484","https://openalex.org/W3160318025","https://openalex.org/W3185287398","https://openalex.org/W3204479434","https://openalex.org/W3214063384","https://openalex.org/W4288021837","https://openalex.org/W6600284362","https://openalex.org/W6629590909","https://openalex.org/W6681875759","https://openalex.org/W6684191040","https://openalex.org/W6770133954","https://openalex.org/W6795955391","https://openalex.org/W6799083703","https://openalex.org/W6803253459"],"related_works":["https://openalex.org/W2391251536","https://openalex.org/W2362198218","https://openalex.org/W2786391746","https://openalex.org/W4381430104","https://openalex.org/W2995102745","https://openalex.org/W4226059458","https://openalex.org/W2914559142","https://openalex.org/W3000197790","https://openalex.org/W1990237101","https://openalex.org/W3196471634"],"abstract_inverted_index":{"Deep":[0],"neural":[1],"network":[2],"(DNN)":[3],"based":[4],"scene":[5,92,144],"text":[6,93,145],"recognition":[7,146],"(STR)":[8],"methods":[9,34,51,131],"usually":[10],"require":[11],"a":[12,85,125],"large":[13],"amount":[14],"of":[15,45,64,69,77,100,105,129,157],"annotated":[16],"data":[17,32,72,88,119],"for":[18,91,132],"training,":[19],"which":[20,95],"is":[21],"time-consuming":[22],"and":[23,58,102,134],"cost-expensive":[24],"in":[25,56],"practice.":[26],"To":[27],"address":[28],"this":[29,81],"issue,":[30],"many":[31],"augmentation":[33,89,120,130,139,151],"have":[35],"been":[36],"developed":[37],"to":[38],"train":[39],"recognizers":[40],"by":[41,116],"improving":[42],"the":[43,53,62,67,70,98,103,112,155],"diversity":[44],"training":[46],"samples.":[47],"However,":[48],"most":[49],"existing":[50],"neglect":[52],"difficulty":[54,99],"inherent":[55],"samples,":[57],"easily":[59],"suffer":[60],"from":[61,75],"problem":[63],"over-diversity,":[65],"i.e.,":[66],"distribution":[68],"augmented":[71],"significantly":[73,153],"deviates":[74],"that":[76,149],"clean":[78],"data.":[79],"In":[80],"paper,":[82],"we":[83,123],"propose":[84],"novel":[86],"difficulty-aware":[87],"framework":[90,109,152],"recognition,":[94],"jointly":[96],"considers":[97],"samples":[101],"strength":[104],"augmentations.":[106],"Specifically,":[107],"our":[108,138,150],"first":[110],"predicts":[111],"sample":[113],"difficulty,":[114],"followed":[115],"an":[117],"adaptive":[118],"strategy.":[121],"Furthermore,":[122],"build":[124],"more":[126],"diverse":[127],"set":[128],"STR":[133],"integrate":[135],"it":[136],"into":[137],"framework.":[140],"Extensive":[141],"experiments":[142],"on":[143],"benchmarks":[147],"show":[148],"improves":[154],"performance":[156],"recognizers.":[158]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
