{"id":"https://openalex.org/W4414170001","doi":"https://doi.org/10.1109/avss65446.2025.11149886","title":"From Diffusion to Decision: A Diffusion-ReRanking in Scene Text Detection","display_name":"From Diffusion to Decision: A Diffusion-ReRanking in Scene Text Detection","publication_year":2025,"publication_date":"2025-08-11","ids":{"openalex":"https://openalex.org/W4414170001","doi":"https://doi.org/10.1109/avss65446.2025.11149886"},"language":"en","primary_location":{"id":"doi:10.1109/avss65446.2025.11149886","is_oa":false,"landing_page_url":"https://doi.org/10.1109/avss65446.2025.11149886","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Advanced Visual and Signal-Based Systems (AVSS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025737507","display_name":"Yong Jia","orcid":"https://orcid.org/0000-0003-3165-3349"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Jia Ying Yong","raw_affiliation_strings":["National Yang Ming Chiao Tung University"],"affiliations":[{"raw_affiliation_string":"National Yang Ming Chiao Tung University","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040050806","display_name":"Hong-Han Shuai","orcid":"https://orcid.org/0000-0003-2216-077X"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hong-Han Shuai","raw_affiliation_strings":["National Yang Ming Chiao Tung University"],"affiliations":[{"raw_affiliation_string":"National Yang Ming Chiao Tung University","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013026466","display_name":"Hongxia Xie","orcid":"https://orcid.org/0000-0002-5652-4327"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210136497","display_name":"Jilin Medical University","ror":"https://ror.org/03mzw7781","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210136497"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongxia Xie","raw_affiliation_strings":["Jilin University"],"affiliations":[{"raw_affiliation_string":"Jilin University","institution_ids":["https://openalex.org/I4210136497","https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048205934","display_name":"Yung\u2010Hui Li","orcid":"https://orcid.org/0000-0002-0475-3689"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yung-Hui Li","raw_affiliation_strings":["Hon Hai Research Institute"],"affiliations":[{"raw_affiliation_string":"Hon Hai Research Institute","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037186260","display_name":"Hao\u2010Wen Cheng","orcid":"https://orcid.org/0000-0003-1940-7962"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Wen-Huang Cheng","raw_affiliation_strings":["National Taiwan University"],"affiliations":[{"raw_affiliation_string":"National Taiwan University","institution_ids":["https://openalex.org/I16733864"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5025737507"],"corresponding_institution_ids":["https://openalex.org/I148366613"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25407028,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/minimum-bounding-box","display_name":"Minimum bounding box","score":0.7339000105857849},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6962000131607056},{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.6812999844551086},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5335999727249146},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.5232999920845032},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.48429998755455017},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.47679999470710754}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7459999918937683},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7376999855041504},{"id":"https://openalex.org/C147037132","wikidata":"https://www.wikidata.org/wiki/Q6865426","display_name":"Minimum bounding box","level":3,"score":0.7339000105857849},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6962000131607056},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.6812999844551086},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5605999827384949},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5335999727249146},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.5232999920845032},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.48429998755455017},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.47679999470710754},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.4691999852657318},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.439300000667572},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.37959998846054077},{"id":"https://openalex.org/C2983589003","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Text detection","level":3,"score":0.3790999948978424},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.34360000491142273},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2953999936580658},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.2799000144004822}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/avss65446.2025.11149886","is_oa":false,"landing_page_url":"https://doi.org/10.1109/avss65446.2025.11149886","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Advanced Visual and Signal-Based Systems (AVSS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1488125194","https://openalex.org/W2008806374","https://openalex.org/W2194775991","https://openalex.org/W2217433794","https://openalex.org/W2565639579","https://openalex.org/W2785383245","https://openalex.org/W2914492226","https://openalex.org/W2963150697","https://openalex.org/W2963299604","https://openalex.org/W3035679705","https://openalex.org/W3106546328","https://openalex.org/W3180169285","https://openalex.org/W3196976036","https://openalex.org/W4214922754","https://openalex.org/W4313172891","https://openalex.org/W4366824582","https://openalex.org/W4382464147","https://openalex.org/W4390873752","https://openalex.org/W4392931179","https://openalex.org/W4402774414"],"related_works":[],"abstract_inverted_index":{"Diffusion":[0],"models":[1],"have":[2],"recently":[3],"shown":[4],"great":[5],"potential":[6,144],"in":[7,93],"object":[8],"detection":[9],"and":[10,28,70,90,101,104,117,135],"instance":[11,26,47,54],"segmentation,":[12],"yet":[13],"their":[14],"application":[15],"to":[16,97,145],"scene":[17,50,147],"text":[18,51,148],"detection,":[19],"with":[20,84],"its":[21],"unique":[22],"challenges":[23],"such":[24],"as":[25],"variability":[27],"subjective":[29],"human":[30],"annotations,":[31],"remains":[32],"unexplored.":[33],"In":[34],"this":[35,75],"paper,":[36],"we":[37],"propose":[38],"DRR":[39,73],"(Diffusion":[40],"ReRanking),":[41],"a":[42,80,85,105,128],"method":[43],"that":[44],"adapts":[45],"diffusion-based":[46],"segmentation":[48,55],"for":[49,61],"detection.":[52,149],"Traditional":[53],"often":[56],"relies":[57],"on":[58,140],"classification":[59],"scores":[60],"ranking,":[62],"potentially":[63],"overlooking":[64],"the":[65,94,110,123],"accuracy":[66,116],"of":[67,87,125,130,133,138],"bounding":[68,114],"boxes":[69],"mask":[71,95,118],"quality.":[72,119],"addresses":[74],"by":[76,112],"incorporating":[77],"two":[78],"networks:":[79],"diffusion":[81],"network,":[82,107],"trained":[83],"combination":[86],"projection":[88],"loss":[89,92],"pairwise":[91],"branch":[96],"produce":[98],"more":[99],"precise":[100],"tightly-bound":[102],"segmentations,":[103],"reranking":[106],"which":[108],"refines":[109],"results":[111],"evaluating":[113],"box":[115],"Extensive":[120],"experiments":[121],"demonstrate":[122],"effectiveness":[124],"DRR,":[126],"achieving":[127],"precision":[129],"86.7%,":[131],"recall":[132],"81.7%,":[134],"an":[136],"F-measure":[137],"84.1%":[139],"CTW1500,":[141],"highlighting":[142],"DRR\u2019s":[143],"advance":[146]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
