{"id":"https://openalex.org/W4399477596","doi":"https://doi.org/10.1145/3626232.3653283","title":"WikiPhish: A Diverse Wikipedia-Based Dataset for Phishing Website Detection: Data/Toolset Paper","display_name":"WikiPhish: A Diverse Wikipedia-Based Dataset for Phishing Website Detection: Data/Toolset Paper","publication_year":2024,"publication_date":"2024-06-10","ids":{"openalex":"https://openalex.org/W4399477596","doi":"https://doi.org/10.1145/3626232.3653283"},"language":"en","primary_location":{"id":"doi:10.1145/3626232.3653283","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3626232.3653283","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourteenth ACM Conference on Data and Application Security and Privacy","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5099069842","display_name":"Gabriel Loiseau","orcid":"https://orcid.org/0009-0008-0160-0979"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gabriel Loiseau","raw_affiliation_strings":["Hornet Security, Hem, France"],"affiliations":[{"raw_affiliation_string":"Hornet Security, Hem, France","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099069843","display_name":"Valentin Lefils","orcid":"https://orcid.org/0009-0006-3012-1657"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Valentin Lefils","raw_affiliation_strings":["Hornet Security, Hem, France"],"affiliations":[{"raw_affiliation_string":"Hornet Security, Hem, France","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089236281","display_name":"Maxime Meyer","orcid":"https://orcid.org/0000-0002-8652-597X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maxime Meyer","raw_affiliation_strings":["Hornet Security, Hem, France"],"affiliations":[{"raw_affiliation_string":"Hornet Security, Hem, France","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013683743","display_name":"Damien Riquet","orcid":"https://orcid.org/0009-0004-9835-9878"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Damien Riquet","raw_affiliation_strings":["Hornet Security, Hem, France"],"affiliations":[{"raw_affiliation_string":"Hornet Security, Hem, France","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5099069842"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.5588,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.85895461,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"361","last_page":"366"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9761000275611877,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/phishing","display_name":"Phishing","score":0.923896312713623},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8110114336013794},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.4763215482234955},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.45075318217277527},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.14485999941825867}],"concepts":[{"id":"https://openalex.org/C83860907","wikidata":"https://www.wikidata.org/wiki/Q135005","display_name":"Phishing","level":3,"score":0.923896312713623},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8110114336013794},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4763215482234955},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.45075318217277527},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.14485999941825867}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3626232.3653283","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3626232.3653283","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourteenth ACM Conference on Data and Application Security and Privacy","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.46000000834465027,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2070896531","https://openalex.org/W2157155774","https://openalex.org/W2169557227","https://openalex.org/W2521519773","https://openalex.org/W2806944993","https://openalex.org/W2898017895","https://openalex.org/W2902942389","https://openalex.org/W2988668989","https://openalex.org/W3011774524","https://openalex.org/W3157510103","https://openalex.org/W3195189229","https://openalex.org/W3195446130","https://openalex.org/W4225454120","https://openalex.org/W4311413020"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2149202530","https://openalex.org/W2807822918","https://openalex.org/W2921723332","https://openalex.org/W2482950156","https://openalex.org/W2305322260","https://openalex.org/W3139248031","https://openalex.org/W3042334625","https://openalex.org/W4200017362"],"abstract_inverted_index":{"Phishing":[0],"remains":[1],"a":[2,58,76,127],"pervasive":[3],"security":[4,38],"threat,":[5],"necessitating":[6],"effective":[7],"and":[8,32,53,79,98,104,122,130,151,164],"universally":[9],"comparable":[10],"detection":[11,22,35,116,136,168],"systems.":[12,39],"The":[13,106],"use":[14],"of":[15,37,46,65,88,114,142,156,166],"supervised":[16],"machine":[17,169],"learning":[18,170],"models":[19,41],"for":[20,49,82,133,146],"phishing":[21,70,83,101,115,135,148,167],"has":[23],"been":[24],"generalized":[25],"in":[26,161],"the":[27,34,99,112,140,153,162],"literature":[28],"to":[29,60,68,110],"automate":[30],"predictions":[31],"increase":[33],"capacities":[36],"These":[40],"rely":[42],"on":[43],"large":[44],"amounts":[45],"annotated":[47,66],"data":[48,67],"their":[50],"training,":[51],"evaluation":[52,165],"maintenance.":[54],"Thus,":[55],"there":[56],"is":[57,108],"need":[59],"efficiently":[61],"collect":[62],"significant":[63],"amount":[64],"improve":[69],"detection.":[71],"This":[72],"paper":[73],"introduces":[74],"WikiPhish,":[75],"novel,":[77],"renewable,":[78],"open-access":[80],"dataset":[81,107],"website":[84,149],"classification.":[85],"It":[86],"consists":[87],"110,606":[89],"webpages":[90],"harvested":[91],"from":[92,95],"URLs":[93,145],"drawn":[94],"Wikipedia's":[96,119],"references":[97],"popular":[100],"databases":[102],"OpenPhish":[103],"PhishTank.":[105],"designed":[109],"address":[111],"challenges":[113],"by":[117,158],"leveraging":[118],"contribution":[120],"verification":[121],"wide-ranging":[123],"content.":[124],"WikiPhish":[125,157],"offers":[126],"more":[128],"diverse":[129,144],"robust":[131],"baseline":[132],"developing":[134],"models.":[137,171],"We":[138],"highlight":[139],"importance":[141],"gathering":[143],"building":[147],"datasets,":[150],"demonstrate":[152],"practical":[154],"utility":[155],"employing":[159],"it":[160],"training":[163]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
