{"id":"https://openalex.org/W4414360078","doi":"https://doi.org/10.24963/ijcai.2025/83","title":"The Devil is in Fine-tuning and Long-tailed Problems: A New Benchmark for Scene Text Detection","display_name":"The Devil is in Fine-tuning and Long-tailed Problems: A New Benchmark for Scene Text Detection","publication_year":2025,"publication_date":"2025-09-01","ids":{"openalex":"https://openalex.org/W4414360078","doi":"https://doi.org/10.24963/ijcai.2025/83"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2025/83","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/83","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011162860","display_name":"Tengfei Cao","orcid":"https://orcid.org/0000-0001-9508-2966"},"institutions":[{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tianjiao Cao","raw_affiliation_strings":["Institute of Information Engineering, Chinese Academy of Sciences","School of Cyber Security, University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210156404","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Cyber Security, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077828258","display_name":"Jiahao Lyu","orcid":"https://orcid.org/0000-0003-2051-8045"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiahao Lyu","raw_affiliation_strings":["Institute of Information Engineering, Chinese Academy of Sciences","School of Cyber Security, University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210156404","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Cyber Security, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100660205","display_name":"Wei Zeng","orcid":"https://orcid.org/0000-0002-5600-8824"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weichao Zeng","raw_affiliation_strings":["Institute of Information Engineering, Chinese Academy of Sciences","School of Cyber Security, University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210156404","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Cyber Security, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038787960","display_name":"Weimin Mu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weimin Mu","raw_affiliation_strings":["Institute of Information Engineering, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210156404","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048421704","display_name":"Yu Zhou","orcid":"https://orcid.org/0000-0002-3224-0063"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Zhou","raw_affiliation_strings":["VCIP & TMCC & DISSec, College of Computer Science, Nankai University"],"affiliations":[{"raw_affiliation_string":"VCIP & TMCC & DISSec, College of Computer Science, Nankai University","institution_ids":["https://openalex.org/I205237279"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5011162860"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210156404","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.26736014,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"738","last_page":"746"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9710999727249146,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9484999775886536,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.817300021648407},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7404000163078308},{"id":"https://openalex.org/keywords/replicate","display_name":"Replicate","score":0.6111000180244446},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5529999732971191},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5443999767303467},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5407999753952026},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.47099998593330383}],"concepts":[{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.817300021648407},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7404000163078308},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7210999727249146},{"id":"https://openalex.org/C2781162219","wikidata":"https://www.wikidata.org/wiki/Q26250693","display_name":"Replicate","level":2,"score":0.6111000180244446},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5796999931335449},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5659999847412109},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5529999732971191},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5443999767303467},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5407999753952026},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.47099998593330383},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4521999955177307},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.43880000710487366},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.41819998621940613},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3785000145435333},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3767000138759613},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.3343000113964081},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2786000072956085},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.27799999713897705},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2696000039577484},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.25130000710487366}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2025/83","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/83","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Scene":[0],"text":[1],"detection":[2],"has":[3],"seen":[4],"the":[5,54,69,79,99,104,119],"emergence":[6],"of":[7,56,82,107,136,155,162],"high-performing":[8],"methods":[9],"that":[10,98],"excel":[11],"on":[12,65],"academic":[13,66],"benchmarks.":[14,67],"However,":[15],"these":[16],"detectors":[17,85],"often":[18],"fail":[19],"to":[20,33,62,78,117,128,157],"replicate":[21],"such":[22],"success":[23],"in":[24,59,72,138],"real-world":[25],"scenarios.":[26],"We":[27,165],"uncover":[28],"two":[29],"key":[30],"factors":[31],"contributing":[32],"this":[34],"discrepancy":[35],"through":[36],"extensive":[37],"experiments.":[38],"First,":[39],"a":[40,112,146,152,159,169,174],"Fine-tuning":[41,120],"Gap,":[42],"where":[43,84],"models":[44],"leverage":[45],"Dataset-Specific":[46],"Optimization":[47],"(DSO)":[48],"paradigm":[49,101],"for":[50,111,177],"one":[51],"domain":[52],"at":[53,183],"cost":[55],"reduced":[57],"effectiveness":[58],"others,":[60],"leads":[61],"inflated":[63],"performances":[64],"Second,":[68],"suboptimal":[70],"performance":[71],"practical":[73],"settings":[74],"is":[75,126,181],"primarily":[76],"attributed":[77],"longtailed":[80],"distribution":[81],"texts,":[83],"struggle":[86],"with":[87],"rare":[88],"and":[89,133],"complex":[90],"categories":[91,132],"as":[92,173],"artistic":[93],"or":[94],"overlapped":[95],"text.":[96],"Given":[97],"DSO":[100],"might":[102],"undermine":[103],"generalization":[105],"ability":[106,156],"models,":[108],"we":[109,144],"advocate":[110],"Joint-Dataset":[113],"Learning":[114],"(JDL)":[115],"protocol":[116],"alleviate":[118],"Gap.":[121],"Additionally,":[122],"an":[123],"error":[124],"analysis":[125],"conducted":[127],"identify":[129],"three":[130],"major":[131],"13":[134],"subcategories":[135],"challenges":[137],"long-tailed":[139,163],"scene":[140],"text,":[141],"upon":[142],"which":[143],"propose":[145],"Long-Tailed":[147],"Benchmark":[148],"(LTB).":[149],"LTB":[150],"facilitates":[151],"comprehensive":[153],"evaluation":[154],"handle":[158],"diverse":[160],"range":[161],"challenges.":[164],"further":[166],"introduce":[167],"MAEDet,":[168],"self-supervised":[170],"learningbased":[171],"method,":[172],"strong":[175],"baseline":[176],"LTB.":[178],"The":[179],"code":[180],"available":[182],"https://github.com/pd162/LTB.":[184]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
