{"id":"https://openalex.org/W3092227219","doi":"https://doi.org/10.1109/isi49825.2020.9280538","title":"Weaponizing Unicodes with Deep Learning -Identifying Homoglyphs with Weakly Labeled Data","display_name":"Weaponizing Unicodes with Deep Learning -Identifying Homoglyphs with Weakly Labeled Data","publication_year":2020,"publication_date":"2020-11-09","ids":{"openalex":"https://openalex.org/W3092227219","doi":"https://doi.org/10.1109/isi49825.2020.9280538","mag":"3092227219"},"language":"en","primary_location":{"id":"doi:10.1109/isi49825.2020.9280538","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isi49825.2020.9280538","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Intelligence and Security Informatics (ISI)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2010.04382","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Perry Deng","orcid":null},"institutions":[{"id":"https://openalex.org/I155173764","display_name":"Rochester Institute of Technology","ror":"https://ror.org/00v4yb702","country_code":"US","type":"education","lineage":["https://openalex.org/I155173764"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Perry Deng","raw_affiliation_strings":["Global Cybersecurity Institute, Rochester Institute of Technology, Rochester, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Global Cybersecurity Institute, Rochester Institute of Technology, Rochester, USA","institution_ids":["https://openalex.org/I155173764"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Cooper Linsky","orcid":null},"institutions":[{"id":"https://openalex.org/I155173764","display_name":"Rochester Institute of Technology","ror":"https://ror.org/00v4yb702","country_code":"US","type":"education","lineage":["https://openalex.org/I155173764"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cooper Linsky","raw_affiliation_strings":["Global Cybersecurity Institute, Rochester Institute of Technology, Rochester, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Global Cybersecurity Institute, Rochester Institute of Technology, Rochester, USA","institution_ids":["https://openalex.org/I155173764"]}]},{"author_position":"last","author":{"id":null,"display_name":"Matthew Wright","orcid":null},"institutions":[{"id":"https://openalex.org/I155173764","display_name":"Rochester Institute of Technology","ror":"https://ror.org/00v4yb702","country_code":"US","type":"education","lineage":["https://openalex.org/I155173764"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matthew Wright","raw_affiliation_strings":["Global Cybersecurity Institute, Rochester Institute of Technology, Rochester, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Global Cybersecurity Institute, Rochester Institute of Technology, Rochester, USA","institution_ids":["https://openalex.org/I155173764"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.1958,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.50848916,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"12","issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.694599986076355},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.673799991607666},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5827000141143799},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5529999732971191},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4422000050544739},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4390999972820282},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.43470001220703125},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4178999960422516},{"id":"https://openalex.org/keywords/equivalence","display_name":"Equivalence (formal languages)","score":0.41609999537467957}],"concepts":[{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.694599986076355},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.673799991607666},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6427000164985657},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.637499988079071},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5827000141143799},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5529999732971191},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4422000050544739},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4390999972820282},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.43470001220703125},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4178999960422516},{"id":"https://openalex.org/C2780069185","wikidata":"https://www.wikidata.org/wiki/Q7977945","display_name":"Equivalence (formal languages)","level":2,"score":0.41609999537467957},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.413100004196167},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.39489999413490295},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3901999890804291},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.38449999690055847},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3749000132083893},{"id":"https://openalex.org/C71901391","wikidata":"https://www.wikidata.org/wiki/Q7126699","display_name":"Upload","level":2,"score":0.35339999198913574},{"id":"https://openalex.org/C22820288","wikidata":"https://www.wikidata.org/wiki/Q9050568","display_name":"String metric","level":4,"score":0.34850001335144043},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3476000130176544},{"id":"https://openalex.org/C44359876","wikidata":"https://www.wikidata.org/wiki/Q5338467","display_name":"Edit distance","level":2,"score":0.3061999976634979},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.29089999198913574},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.28630000352859497},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.2847000062465668},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2824000120162964},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.27790001034736633},{"id":"https://openalex.org/C739882","wikidata":"https://www.wikidata.org/wiki/Q3560506","display_name":"Anomaly detection","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2741999924182892},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2556000053882599},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.2540000081062317}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/isi49825.2020.9280538","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isi49825.2020.9280538","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Intelligence and Security Informatics (ISI)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2010.04382","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2010.04382","pdf_url":"https://arxiv.org/pdf/2010.04382","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2010.04382","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2010.04382","pdf_url":"https://arxiv.org/pdf/2010.04382","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W42110184","https://openalex.org/W2083055907","https://openalex.org/W2096733369","https://openalex.org/W2194321275","https://openalex.org/W2807459182","https://openalex.org/W2963635116","https://openalex.org/W2972760196","https://openalex.org/W2997420602","https://openalex.org/W3034942609","https://openalex.org/W6762718338"],"related_works":[],"abstract_inverted_index":{"Visually":[0],"similar":[1],"characters,":[2],"or":[3,13,142],"homoglyphs,":[4,188],"can":[5],"be":[6,199],"used":[7],"to":[8,14,24,31,61,138,143,171,182,211],"perform":[9],"social":[10],"engineering":[11],"attacks":[12],"evade":[15],"spam":[16],"and":[17,44,59,68,189,204],"plagiarism":[18],"detectors.":[19],"It":[20],"is":[21,129],"thus":[22],"important":[23],"understand":[25],"the":[26,63,84,96,117,159,174],"capabilities":[27],"of":[28,66,78,112,125,196,206],"an":[29,109],"attacker":[30],"identify":[32,70],"homoglyphs":[33,122,141,208],"-":[34,43],"particularly":[35],"ones":[36],"that":[37,81,86,194],"have":[38],"not":[39,90],"been":[40],"previously":[41,186],"spotted":[42],"leverage":[45],"them":[46],"in":[47],"attacks.":[48],"We":[49,114,177],"investigate":[50],"a":[51,154],"deep-learning":[52],"model":[53,93,181],"using":[54],"embedding":[55],"learning,":[56,58],"transfer":[57],"augmentation":[60],"determine":[62],"visual":[64],"similarity":[65],"characters":[67,88],"thereby":[69],"potential":[71],"homoglyphs.":[72,91],"Our":[73,92,164],"approach":[74,100],"uniquely":[75],"takes":[76],"advantage":[77],"weak":[79],"labels":[80],"arise":[82],"from":[83],"fact":[85],"most":[87],"are":[89,209],"drastically":[94],"outperforms":[95],"Normal-ized":[97],"Compression":[98],"Distance":[99],"on":[101,158],"pairwise":[102,133],"homoglyph":[103],"identification,":[104],"for":[105,135,173],"which":[106,128],"we":[107,152],"achieve":[108],"average":[110],"precision":[111],"0.97.":[113],"also":[115,178],"present":[116],"first":[118],"attempt":[119],"at":[120],"clustering":[121,150,165],"into":[123],"sets":[124],"equivalence":[126],"classes,":[127],"more":[130],"efficient":[131],"than":[132],"information":[134],"security":[136],"practitioners":[137],"quickly":[139],"lookup":[140],"normalize":[144],"confusable":[145],"string":[146],"encodings.":[147],"To":[148],"measure":[149],"performance,":[151],"propose":[153],"metric":[155],"(mBIOU)":[156],"building":[157],"classic":[160],"Intersection-Over-Union":[161],"(IOU)":[162],"metric.":[163],"method":[166],"achieves":[167],"0.592":[168],"mBIOU,":[169],"compared":[170],"0.430":[172],"naive":[175],"baseline.":[176],"use":[179],"our":[180],"predict":[183],"over":[184],"8,000":[185],"unknown":[187],"find":[190],"good":[191],"early":[192],"indications":[193],"many":[195],"these":[197],"may":[198],"true":[200],"positives.":[201],"Source":[202],"code":[203],"list":[205],"predicted":[207],"uploaded":[210],"Github:":[212],"https://github.com/PerryXDeng/weaponizing_unicode.":[213]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2020-10-15T00:00:00"}
