{"id":"https://openalex.org/W4413907239","doi":"https://doi.org/10.32604/cmc.2025.068156","title":"OCR-Assisted Masked BERT for Homoglyph Restoration towards Multiple Phishing Text Downstream Tasks","display_name":"OCR-Assisted Masked BERT for Homoglyph Restoration towards Multiple Phishing Text Downstream Tasks","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4413907239","doi":"https://doi.org/10.32604/cmc.2025.068156"},"language":"en","primary_location":{"id":"doi:10.32604/cmc.2025.068156","is_oa":true,"landing_page_url":"https://doi.org/10.32604/cmc.2025.068156","pdf_url":null,"source":{"id":"https://openalex.org/S4210191605","display_name":"Computers, materials & continua/Computers, materials & continua (Print)","issn_l":"1546-2218","issn":["1546-2218","1546-2226"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers, Materials &amp; Continua","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.32604/cmc.2025.068156","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Hanyong Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hanyong Lee","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ye-Chan Park","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye-Chan Park","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100744043","display_name":"Jaesung Lee","orcid":"https://orcid.org/0000-0002-3757-3510"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jaesung Lee","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.1783,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.82485498,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"85","issue":"3","first_page":"4977","last_page":"4993"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9843999743461609,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/downstream","display_name":"Downstream (manufacturing)","score":0.7595733404159546},{"id":"https://openalex.org/keywords/phishing","display_name":"Phishing","score":0.6163997054100037},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6020355224609375},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.38236236572265625},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3476906716823578},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.30112677812576294},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.17541995644569397},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.14891088008880615},{"id":"https://openalex.org/keywords/operations-management","display_name":"Operations management","score":0.06977865099906921}],"concepts":[{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.7595733404159546},{"id":"https://openalex.org/C83860907","wikidata":"https://www.wikidata.org/wiki/Q135005","display_name":"Phishing","level":3,"score":0.6163997054100037},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6020355224609375},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38236236572265625},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3476906716823578},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.30112677812576294},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.17541995644569397},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14891088008880615},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.06977865099906921}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.32604/cmc.2025.068156","is_oa":true,"landing_page_url":"https://doi.org/10.32604/cmc.2025.068156","pdf_url":null,"source":{"id":"https://openalex.org/S4210191605","display_name":"Computers, materials & continua/Computers, materials & continua (Print)","issn_l":"1546-2218","issn":["1546-2218","1546-2226"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers, Materials &amp; Continua","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.32604/cmc.2025.068156","is_oa":true,"landing_page_url":"https://doi.org/10.32604/cmc.2025.068156","pdf_url":null,"source":{"id":"https://openalex.org/S4210191605","display_name":"Computers, materials & continua/Computers, materials & continua (Print)","issn_l":"1546-2218","issn":["1546-2218","1546-2226"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers, Materials &amp; Continua","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":4,"referenced_works":["https://openalex.org/W2972760196","https://openalex.org/W4225858632","https://openalex.org/W4296351595","https://openalex.org/W4406055949"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2149202530","https://openalex.org/W2807822918","https://openalex.org/W2921723332","https://openalex.org/W4391093354","https://openalex.org/W2482950156","https://openalex.org/W4396966040","https://openalex.org/W2305322260","https://openalex.org/W3139248031","https://openalex.org/W3042334625"],"abstract_inverted_index":{"Restoring":[0],"texts":[1],"corrupted":[2],"by":[3,127],"visually":[4,27,102,239],"perturbed":[5,103],"homoglyph":[6,39,203],"characters":[7,24,104,130],"presents":[8],"significant":[9],"challenges":[10],"to":[11,20,42,60,122,168,212],"conventional":[12],"Natural":[13],"Language":[14],"Processing":[15],"(NLP)":[16],"systems,":[17],"primarily":[18],"due":[19,41],"ambiguities":[21,126],"arising":[22],"from":[23,78],"that":[25,145],"appear":[26],"similar":[28],"yet":[29],"differ":[30],"semantically.":[31],"Traditional":[32],"text":[33,86,241],"restoration":[34,152,164],"methods":[35],"struggle":[36],"with":[37,93,227],"these":[38,65],"perturbations":[40],"limitations":[43],"such":[44,194],"as":[45,195],"a":[46,94,162,232],"lack":[47],"of":[48,166,207,249],"contextual":[49,120,229],"understanding":[50],"and":[51,157,182,202,218,234,247],"difficulty":[52],"in":[53,170,179,252],"handling":[54],"cases":[55],"where":[56,98],"one":[57],"character":[58],"maps":[59],"multiple":[61,191],"candidates.":[62],"To":[63],"address":[64],"issues,":[66],"we":[67],"propose":[68],"an":[69],"Optical":[70],"Character":[71],"Recognition":[72],"(OCR)-assisted":[73],"masked":[74],"Bidirectional":[75],"Encoder":[76],"Representations":[77],"Transformers":[79],"(BERT)":[80],"model":[81],"specifically":[82],"designed":[83],"for":[84,237],"homoglyph-perturbed":[85],"restoration.":[87],"Our":[88,220],"method":[89,148],"integrates":[90],"OCR":[91,99,225],"preprocessing":[92,100,226],"character-level":[95,116,228],"BERT":[96,117],"architecture,":[97],"transforms":[101],"into":[105],"their":[106],"approximate":[107],"alphabetic":[108],"equivalents,":[109],"significantly":[110,149],"reducing":[111],"multi-correspondence":[112],"ambiguities.":[113],"Subsequently,":[114],"the":[115,146,215,245],"leverages":[118],"bidirectional":[119],"information":[121],"accurately":[123],"resolve":[124],"remaining":[125],"predicting":[128],"intended":[129],"based":[131],"on":[132,139],"surrounding":[133],"semantic":[134],"cues.":[135],"Extensive":[136],"experiments":[137],"conducted":[138],"realistic":[140],"phishing":[141],"email":[142],"datasets":[143],"demonstrate":[144],"proposed":[147,221],"outperforms":[150],"existing":[151],"techniques,":[153],"including":[154],"OCR-based,":[155],"dictionary-based,":[156],"traditional":[158],"BERT-based":[159],"approaches,":[160],"achieving":[161],"word-level":[163],"accuracy":[165],"up":[167],"99.59%":[169],"fine-tuned":[171],"settings.":[172],"Additionally,":[173],"our":[174],"approach":[175],"exhibits":[176],"robust":[177],"performance":[178],"zero-shot":[180],"scenarios":[181],"maintains":[183],"effectiveness":[184],"under":[185,205],"low-resource":[186],"conditions.":[187],"Further":[188],"evaluations":[189],"across":[190],"downstream":[192],"tasks,":[193],"part-of-speech":[196],"tagging,":[197],"chunking,":[198],"toxic":[199],"comment":[200],"classification,":[201],"detection":[204],"conditions":[206],"severe":[208],"visual":[209],"perturbation":[210],"(up":[211],"40%),":[213],"confirm":[214],"method\u2019s":[216],"generalizability":[217],"applicability.":[219],"hybrid":[222],"approach,":[223],"combining":[224],"modeling,":[230],"represents":[231],"scalable":[233],"practical":[235],"solution":[236],"mitigating":[238],"adversarial":[240],"attacks,":[242],"thereby":[243],"enhancing":[244],"security":[246],"reliability":[248],"NLP":[250],"systems":[251],"real-world":[253],"applications.":[254]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-10-10T00:00:00"}
