{"id":"https://openalex.org/W2913694443","doi":"https://doi.org/10.1109/bigdata.2018.8622547","title":"Phishing URL Detection with Oversampling based on Text Generative Adversarial Networks","display_name":"Phishing URL Detection with Oversampling based on Text Generative Adversarial Networks","publication_year":2018,"publication_date":"2018-12-01","ids":{"openalex":"https://openalex.org/W2913694443","doi":"https://doi.org/10.1109/bigdata.2018.8622547","mag":"2913694443"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2018.8622547","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2018.8622547","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110698003","display_name":"Ankesh Anand","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ankesh Anand","raw_affiliation_strings":["Montreal Institute for Learning Algorithms, Montreal, QC, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Montreal Institute for Learning Algorithms, Montreal, QC, Canada","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008133675","display_name":"Kshitij Gorde","orcid":null},"institutions":[{"id":"https://openalex.org/I102149020","display_name":"University of North Carolina at Charlotte","ror":"https://ror.org/04dawnj30","country_code":"US","type":"education","lineage":["https://openalex.org/I102149020"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kshitij Gorde","raw_affiliation_strings":["University of North Carolina, Charlotte, NC, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of North Carolina, Charlotte, NC, USA","institution_ids":["https://openalex.org/I102149020"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000244424","display_name":"Joel Ruben Antony Moniz","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Joel Ruben Antony Moniz","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067253588","display_name":"Noseong Park","orcid":"https://orcid.org/0000-0002-1268-840X"},"institutions":[{"id":"https://openalex.org/I162714631","display_name":"George Mason University","ror":"https://ror.org/02jqj7156","country_code":"US","type":"education","lineage":["https://openalex.org/I162714631"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Noseong Park","raw_affiliation_strings":["George Mason University, Fairfax, VA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"George Mason University, Fairfax, VA, USA","institution_ids":["https://openalex.org/I162714631"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046521217","display_name":"Tanmoy Chakraborty","orcid":"https://orcid.org/0000-0002-0210-0369"},"institutions":[{"id":"https://openalex.org/I119939252","display_name":"Indraprastha Institute of Information Technology Delhi","ror":"https://ror.org/03vfp4g33","country_code":"IN","type":"education","lineage":["https://openalex.org/I119939252"]},{"id":"https://openalex.org/I68891433","display_name":"Indian Institute of Technology Delhi","ror":"https://ror.org/049tgcd06","country_code":"IN","type":"education","lineage":["https://openalex.org/I68891433"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Tanmoy Chakraborty","raw_affiliation_strings":["IIIT-Delhi, New Delhi, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IIIT-Delhi, New Delhi, India","institution_ids":["https://openalex.org/I119939252","https://openalex.org/I68891433"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111734614","display_name":"Bei-Tseng Chu","orcid":null},"institutions":[{"id":"https://openalex.org/I102149020","display_name":"University of North Carolina at Charlotte","ror":"https://ror.org/04dawnj30","country_code":"US","type":"education","lineage":["https://openalex.org/I102149020"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bei-Tseng Chu","raw_affiliation_strings":["University of North Carolina, Charlotte, NC, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of North Carolina, Charlotte, NC, USA","institution_ids":["https://openalex.org/I102149020"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5110698003"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":11.1031,"has_fulltext":false,"cited_by_count":56,"citation_normalized_percentile":{"value":0.98407066,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1168","last_page":"1177"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/oversampling","display_name":"Oversampling","score":0.947306752204895},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7779733538627625},{"id":"https://openalex.org/keywords/phishing","display_name":"Phishing","score":0.7714953422546387},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.7110052704811096},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6122573614120483},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.6054366230964661},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5325120091438293},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5059608817100525},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4808581471443176},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4220700263977051},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.36110883951187134},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3347565829753876},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.19700780510902405},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.06971266865730286},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.06705978512763977}],"concepts":[{"id":"https://openalex.org/C197323446","wikidata":"https://www.wikidata.org/wiki/Q331222","display_name":"Oversampling","level":3,"score":0.947306752204895},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7779733538627625},{"id":"https://openalex.org/C83860907","wikidata":"https://www.wikidata.org/wiki/Q135005","display_name":"Phishing","level":3,"score":0.7714953422546387},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.7110052704811096},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6122573614120483},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.6054366230964661},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5325120091438293},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5059608817100525},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4808581471443176},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4220700263977051},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.36110883951187134},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3347565829753876},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.19700780510902405},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.06971266865730286},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.06705978512763977},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2018.8622547","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2018.8622547","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1582036582","https://openalex.org/W1930835045","https://openalex.org/W1983305208","https://openalex.org/W1987971958","https://openalex.org/W1989957782","https://openalex.org/W2012481173","https://openalex.org/W2029470356","https://openalex.org/W2064675550","https://openalex.org/W2099471712","https://openalex.org/W2104933073","https://openalex.org/W2121990650","https://openalex.org/W2132791018","https://openalex.org/W2146729596","https://openalex.org/W2147203050","https://openalex.org/W2148143831","https://openalex.org/W2156838815","https://openalex.org/W2168508521","https://openalex.org/W2295731716","https://openalex.org/W2547875792","https://openalex.org/W2625935159","https://openalex.org/W2950151997","https://openalex.org/W2962879692","https://openalex.org/W2964201867","https://openalex.org/W2964268978","https://openalex.org/W4295521014","https://openalex.org/W4320013936","https://openalex.org/W6640284647","https://openalex.org/W6679745481","https://openalex.org/W6696870837","https://openalex.org/W6729448088","https://openalex.org/W6732249622","https://openalex.org/W6735913928","https://openalex.org/W6779669310"],"related_works":["https://openalex.org/W2766503024","https://openalex.org/W2781247653","https://openalex.org/W4206637278","https://openalex.org/W4386005305","https://openalex.org/W4386214543","https://openalex.org/W3082051559","https://openalex.org/W1969988626","https://openalex.org/W1682621979","https://openalex.org/W2141301039","https://openalex.org/W2619203976"],"abstract_inverted_index":{"The":[0],"problem":[1],"of":[2,36,50,149,181],"imbalanced":[3],"classes":[4],"arises":[5],"frequently":[6],"in":[7,78,123,136],"binary":[8],"classification":[9],"tasks.":[10],"If":[11],"one":[12],"class":[13,61,139],"outnumbers":[14],"another,":[15],"trained":[16],"classifiers":[17],"become":[18],"heavily":[19],"biased":[20],"towards":[21],"the":[22,34,41,48,55,59,75,79,83,107,124,137,150,175,182,190],"majority":[23,42],"class.":[24],"For":[25],"phishing":[26],"URL":[27,157],"detection,":[28],"it":[29],"is":[30,44,87,109],"very":[31],"natural":[32],"that":[33,144],"number":[35,49],"collected":[37,51],"benign":[38,165],"URLs":[39,53,90,135,143,185],"(i.e.,":[40,54],"class)":[43],"much":[45],"larger":[46],"than":[47],"phishy":[52,163],"minority":[56,60,138],"class).":[57],"Oversampling":[58],"can":[62,145],"be":[63,146],"a":[64,155],"powerful":[65],"tool":[66],"to":[67,159],"overcome":[68],"this":[69,116],"situation.":[70],"However,":[71],"existing":[72],"methods":[73,97],"perform":[74],"oversampling":[76,121,177],"task":[77],"feature":[80,102],"space":[81],"where":[82],"original":[84,183],"data":[85,125],"format":[86],"removed":[88],"and":[89,106,111,140,164],"are":[91,98,104,186],"succinctly":[92],"represented":[93],"by":[94,189],"vectors.":[95],"These":[96],"successful":[99],"only":[100],"if":[101],"definitions":[103],"correct":[105],"dataset":[108],"diverse":[110],"not":[112],"too":[113],"sparse.":[114],"In":[115],"paper,":[117],"we":[118],"propose":[119],"an":[120],"technique":[122],"space.":[126],"We":[127,153],"train":[128],"text":[129,192],"generative":[130,193],"adversarial":[131],"networks":[132],"(text-GANs)":[133],"with":[134],"generate":[141],"synthetic":[142],"made":[147],"part":[148],"training":[151],"set.":[152],"crawl":[154],"crowd-sourced":[156],"repository":[158],"collect":[160],"recently":[161],"discovered":[162],"URLs.":[166],"Our":[167],"experiments":[168],"demonstrate":[169],"significant":[170],"performance":[171],"improvements":[172],"after":[173],"using":[174],"proposed":[176,191],"technique.":[178],"Interestingly,":[179],"some":[180],"test":[184],"exactly":[187],"regenerated":[188],"model.":[194]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":11},{"year":2021,"cited_by_count":14},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":4},{"year":2017,"cited_by_count":1}],"updated_date":"2026-05-28T09:10:13.091523","created_date":"2025-10-10T00:00:00"}
