{"id":"https://openalex.org/W4408352187","doi":"https://doi.org/10.1109/icassp49660.2025.10888943","title":"Char-SAM: Turning Segment Anything Model into Scene Text Segmentation Annotator with Character-level Visual Prompts","display_name":"Char-SAM: Turning Segment Anything Model into Scene Text Segmentation Annotator with Character-level Visual Prompts","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408352187","doi":"https://doi.org/10.1109/icassp49660.2025.10888943"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10888943","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10888943","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041031140","display_name":"Enze Xie","orcid":"https://orcid.org/0000-0001-6890-1049"},"institutions":[{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Enze Xie","raw_affiliation_strings":["University of Chinese Academy of Sciences,Institute of Information Engineering, Chinese Academy of Sciences School of Cyber Security,Beijing,China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences,Institute of Information Engineering, Chinese Academy of Sciences School of Cyber Security,Beijing,China","institution_ids":["https://openalex.org/I4210156404","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002479762","display_name":"Jiahao Lyu","orcid":"https://orcid.org/0000-0001-6788-3942"},"institutions":[{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiahao Lyu","raw_affiliation_strings":["University of Chinese Academy of Sciences,Institute of Information Engineering, Chinese Academy of Sciences School of Cyber Security,Beijing,China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences,Institute of Information Engineering, Chinese Academy of Sciences School of Cyber Security,Beijing,China","institution_ids":["https://openalex.org/I4210156404","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054948401","display_name":"Daiqing Wu","orcid":"https://orcid.org/0009-0000-2320-387X"},"institutions":[{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Daiqing Wu","raw_affiliation_strings":["University of Chinese Academy of Sciences,Institute of Information Engineering, Chinese Academy of Sciences School of Cyber Security,Beijing,China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences,Institute of Information Engineering, Chinese Academy of Sciences School of Cyber Security,Beijing,China","institution_ids":["https://openalex.org/I4210156404","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017766479","display_name":"Huawen Shen","orcid":"https://orcid.org/0000-0001-8192-4552"},"institutions":[{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huawen Shen","raw_affiliation_strings":["University of Chinese Academy of Sciences,Institute of Information Engineering, Chinese Academy of Sciences School of Cyber Security,Beijing,China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences,Institute of Information Engineering, Chinese Academy of Sciences School of Cyber Security,Beijing,China","institution_ids":["https://openalex.org/I4210156404","https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016175345","display_name":"Yu Zhou","orcid":"https://orcid.org/0000-0003-4188-9953"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Zhou","raw_affiliation_strings":["Nankai University,VCIP &amp; TMCC &amp; DISSec College of Computer Science,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"Nankai University,VCIP &amp; TMCC &amp; DISSec College of Computer Science,Tianjin,China","institution_ids":["https://openalex.org/I205237279"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5041031140"],"corresponding_institution_ids":["https://openalex.org/I4210156404","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02003605,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9785000085830688,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9785000085830688,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9771999716758728,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9344000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.7879724502563477},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6695353388786316},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6593351364135742},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6167023181915283},{"id":"https://openalex.org/keywords/char","display_name":"Char","score":0.500725269317627},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4851287305355072},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.47503119707107544},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.4216857850551605},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37658777832984924},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09944146871566772},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09885391592979431},{"id":"https://openalex.org/keywords/geometry","display_name":"Geometry","score":0.09156197309494019}],"concepts":[{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.7879724502563477},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6695353388786316},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6593351364135742},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6167023181915283},{"id":"https://openalex.org/C2779970684","wikidata":"https://www.wikidata.org/wiki/Q11567121","display_name":"Char","level":3,"score":0.500725269317627},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4851287305355072},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.47503119707107544},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.4216857850551605},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37658777832984924},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09944146871566772},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09885391592979431},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.09156197309494019},{"id":"https://openalex.org/C36759035","wikidata":"https://www.wikidata.org/wiki/Q176848","display_name":"Pyrolysis","level":2,"score":0.0},{"id":"https://openalex.org/C548081761","wikidata":"https://www.wikidata.org/wiki/Q180388","display_name":"Waste management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10888943","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10888943","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W2343052201","https://openalex.org/W2412782625","https://openalex.org/W2560023338","https://openalex.org/W2612775908","https://openalex.org/W2785383245","https://openalex.org/W2963299604","https://openalex.org/W2967615747","https://openalex.org/W2972678073","https://openalex.org/W2982148195","https://openalex.org/W2991090032","https://openalex.org/W3003921261","https://openalex.org/W3014641072","https://openalex.org/W3034447740","https://openalex.org/W3168428721","https://openalex.org/W3178292378","https://openalex.org/W3179897446","https://openalex.org/W3206651063","https://openalex.org/W4304084026","https://openalex.org/W4317677707","https://openalex.org/W4323892169","https://openalex.org/W4385767964","https://openalex.org/W4387968066","https://openalex.org/W4387968277","https://openalex.org/W4390874575","https://openalex.org/W4391021462","https://openalex.org/W4391109864","https://openalex.org/W4403488523","https://openalex.org/W4404239097","https://openalex.org/W4405093242","https://openalex.org/W4408634392","https://openalex.org/W4409366302","https://openalex.org/W4409367174","https://openalex.org/W4409367990","https://openalex.org/W6691603626","https://openalex.org/W6784333009","https://openalex.org/W6873495549"],"related_works":["https://openalex.org/W3100008673","https://openalex.org/W3014196776","https://openalex.org/W4365804372","https://openalex.org/W2361493043","https://openalex.org/W2387569949","https://openalex.org/W3175631101","https://openalex.org/W4231241302","https://openalex.org/W2059524743","https://openalex.org/W2742148006","https://openalex.org/W1522196789"],"abstract_inverted_index":{"The":[0,35],"recent":[1],"emergence":[2],"of":[3,142,153,169,178],"the":[4,47,104,125,150,167,176],"Segment":[5],"Anything":[6],"Model":[7],"(SAM)":[8],"enables":[9,175],"various":[10],"domain-specific":[11],"segmentation":[12,76,138,159,182],"tasks":[13],"to":[14,116,131,155],"be":[15],"tackled":[16],"cost-effectively":[17],"by":[18],"using":[19,103],"bounding":[20,37,49,92,100],"boxes":[21],"as":[22,39,51,120],"prompts.":[23],"However,":[24],"in":[25,124,134],"scene":[26,180],"text":[27,87,117,158,181],"segmentation,":[28],"SAM":[29,72,133,154],"can":[30],"not":[31],"achieve":[32],"desirable":[33],"performance.":[34],"word-level":[36,91],"box":[38,50,93,101],"prompts":[40,52,102],"is":[41],"too":[42],"coarse":[43],"for":[44],"characters,":[45],"while":[46],"character-level":[48,99],"suffers":[53],"from":[54,184],"over-segmentation":[55,143],"and":[56,144,189],"under-segmentation":[57],"issues.":[58],"In":[59],"this":[60],"paper,":[61],"we":[62,95,111],"propose":[63],"an":[64],"automatic":[65],"annotation":[66],"pipeline":[67],"named":[68],"Char-SAM,":[69],"that":[70],"turns":[71],"into":[73],"a":[74,79,121],"low-cost":[75],"annotator":[77],"with":[78,90],"Character-level":[80],"visual":[81],"prompt.":[82],"Specifically,":[83],"leveraging":[84],"some":[85],"existing":[86],"detection":[88],"datasets":[89,183,186],"annotations,":[94],"first":[96],"generate":[97,156],"finer-grained":[98],"Character":[105,126],"Bounding-box":[106],"Refinement":[107,128],"(CBR)":[108],"module.":[109],"Next,":[110],"employ":[112],"glyph":[113],"information":[114],"corresponding":[115],"character":[118],"categories":[119],"new":[122],"prompt":[123],"Glyph":[127],"(CGR)":[129],"module":[130],"guide":[132],"producing":[135],"more":[136],"accurate":[137],"masks,":[139],"addressing":[140],"issues":[141],"under-segmentation.":[145],"These":[146],"modules":[147],"fully":[148],"utilize":[149],"bbox-to-mask":[151],"capability":[152],"high-quality":[157,179],"annotations":[160],"automatically.":[161],"Extensive":[162],"experiments":[163],"on":[164],"TextSeg":[165],"validate":[166],"effectiveness":[168],"Char-SAM.":[170],"Its":[171],"training-free":[172],"nature":[173],"also":[174],"generation":[177],"real-world":[185],"like":[187],"COCO-Text":[188],"MLT17.":[190]},"counts_by_year":[],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
