{"id":"https://openalex.org/W1973990666","doi":"https://doi.org/10.1109/bigdata.2013.6691627","title":"Malicious URL filtering &amp;#x2014; A big data application","display_name":"Malicious URL filtering &amp;#x2014; A big data application","publication_year":2013,"publication_date":"2013-10-01","ids":{"openalex":"https://openalex.org/W1973990666","doi":"https://doi.org/10.1109/bigdata.2013.6691627","mag":"1973990666"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2013.6691627","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2013.6691627","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE International Conference on Big Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102148083","display_name":"Min-Sheng Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I154864474","display_name":"National Taiwan University of Science and Technology","ror":"https://ror.org/00q09pe49","country_code":"TW","type":"education","lineage":["https://openalex.org/I154864474"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Min-Sheng Lin","raw_affiliation_strings":["Dept. of Computer Science and Information Engineering, National Taiwan Univ. of Science and Technology, Taipei, Taiwan","Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan.#TAB#"],"affiliations":[{"raw_affiliation_string":"Dept. of Computer Science and Information Engineering, National Taiwan Univ. of Science and Technology, Taipei, Taiwan","institution_ids":["https://openalex.org/I154864474"]},{"raw_affiliation_string":"Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan.#TAB#","institution_ids":["https://openalex.org/I154864474"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077249286","display_name":"Chien-Yi Chiu","orcid":null},"institutions":[{"id":"https://openalex.org/I154864474","display_name":"National Taiwan University of Science and Technology","ror":"https://ror.org/00q09pe49","country_code":"TW","type":"education","lineage":["https://openalex.org/I154864474"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Chien-Yi Chiu","raw_affiliation_strings":["Dept. of Computer Science and Information Engineering, National Taiwan Univ. of Science and Technology, Taipei, Taiwan","Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan.#TAB#"],"affiliations":[{"raw_affiliation_string":"Dept. of Computer Science and Information Engineering, National Taiwan Univ. of Science and Technology, Taipei, Taiwan","institution_ids":["https://openalex.org/I154864474"]},{"raw_affiliation_string":"Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan.#TAB#","institution_ids":["https://openalex.org/I154864474"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038395989","display_name":"Yuh\u2010Jye Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I154864474","display_name":"National Taiwan University of Science and Technology","ror":"https://ror.org/00q09pe49","country_code":"TW","type":"education","lineage":["https://openalex.org/I154864474"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yuh-Jye Lee","raw_affiliation_strings":["Dept. of Computer Science and Information Engineering, National Taiwan Univ. of Science and Technology, Taipei, Taiwan","Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan.#TAB#"],"affiliations":[{"raw_affiliation_string":"Dept. of Computer Science and Information Engineering, National Taiwan Univ. of Science and Technology, Taipei, Taiwan","institution_ids":["https://openalex.org/I154864474"]},{"raw_affiliation_string":"Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan.#TAB#","institution_ids":["https://openalex.org/I154864474"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030897009","display_name":"Hsing-Kuo Pao","orcid":"https://orcid.org/0000-0002-5518-9475"},"institutions":[{"id":"https://openalex.org/I154864474","display_name":"National Taiwan University of Science and Technology","ror":"https://ror.org/00q09pe49","country_code":"TW","type":"education","lineage":["https://openalex.org/I154864474"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hsing-Kuo Pao","raw_affiliation_strings":["Dept. of Computer Science and Information Engineering, National Taiwan Univ. of Science and Technology, Taipei, Taiwan","Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan.#TAB#"],"affiliations":[{"raw_affiliation_string":"Dept. of Computer Science and Information Engineering, National Taiwan Univ. of Science and Technology, Taipei, Taiwan","institution_ids":["https://openalex.org/I154864474"]},{"raw_affiliation_string":"Department of Computer Science and Information Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan.#TAB#","institution_ids":["https://openalex.org/I154864474"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102148083"],"corresponding_institution_ids":["https://openalex.org/I154864474"],"apc_list":null,"apc_paid":null,"fwci":8.898,"has_fulltext":false,"cited_by_count":45,"citation_normalized_percentile":{"value":0.972416,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"589","last_page":"596"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8641470670700073},{"id":"https://openalex.org/keywords/spamming","display_name":"Spamming","score":0.7774815559387207},{"id":"https://openalex.org/keywords/phishing","display_name":"Phishing","score":0.7200175523757935},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.5591080784797668},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.5365056991577148},{"id":"https://openalex.org/keywords/volume","display_name":"Volume (thermodynamics)","score":0.47390949726104736},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.46265631914138794},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.45704275369644165},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.4532346725463867},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3716164231300354},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.26231539249420166},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.1670130491256714},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.14842623472213745}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8641470670700073},{"id":"https://openalex.org/C158955206","wikidata":"https://www.wikidata.org/wiki/Q83058","display_name":"Spamming","level":3,"score":0.7774815559387207},{"id":"https://openalex.org/C83860907","wikidata":"https://www.wikidata.org/wiki/Q135005","display_name":"Phishing","level":3,"score":0.7200175523757935},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.5591080784797668},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.5365056991577148},{"id":"https://openalex.org/C20556612","wikidata":"https://www.wikidata.org/wiki/Q4469374","display_name":"Volume (thermodynamics)","level":2,"score":0.47390949726104736},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.46265631914138794},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.45704275369644165},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.4532346725463867},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3716164231300354},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.26231539249420166},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.1670130491256714},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.14842623472213745},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2013.6691627","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2013.6691627","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE International Conference on Big Data","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7900000214576721,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320307102","display_name":"Intel Corporation","ror":"https://ror.org/01ek73717"},{"id":"https://openalex.org/F4320321040","display_name":"National Science Council","ror":"https://ror.org/02kv4zf79"},{"id":"https://openalex.org/F4320323900","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W1985683032","https://openalex.org/W2012481173","https://openalex.org/W2020645183","https://openalex.org/W2023973750","https://openalex.org/W2053774863","https://openalex.org/W2102475112","https://openalex.org/W2121990650","https://openalex.org/W2122537498","https://openalex.org/W2160218441","https://openalex.org/W2163764145","https://openalex.org/W2295731716","https://openalex.org/W3009009611","https://openalex.org/W4251917148","https://openalex.org/W4285719527","https://openalex.org/W6683584131","https://openalex.org/W6696870837"],"related_works":["https://openalex.org/W1987732684","https://openalex.org/W2286465138","https://openalex.org/W637393809","https://openalex.org/W2808742736","https://openalex.org/W1526983901","https://openalex.org/W3126526144","https://openalex.org/W3021299372","https://openalex.org/W2492910442","https://openalex.org/W2255926860","https://openalex.org/W2901744059"],"abstract_inverted_index":{"Malicious":[0],"URLs":[1,22,97,121],"have":[2],"become":[3],"a":[4,45,66,71,105],"channel":[5],"for":[6,17,154,162,197],"Internet":[7,37],"criminal":[8],"activities":[9],"such":[10],"as":[11,101],"drive-by-download,":[12],"spamming":[13],"and":[14,69,136,139,194],"phishing.":[15],"Applications":[16],"the":[18,32,52,92,102,142,164,177],"detection":[19],"of":[20,82,94,96,169,179],"malicious":[21,83,170],"are":[23,149],"accurate":[24],"but":[25,160],"slow":[26],"(because":[27],"they":[28],"need":[29],"to":[30,56,116,187],"download":[31],"content":[33,198],"or":[34],"query":[35],"some":[36],"host":[38],"information).":[39],"In":[40],"this":[41],"paper":[42],"we":[43],"present":[44],"novel":[46],"lightweight":[47],"filter":[48,173],"based":[49],"only":[50,153],"on":[51,65,182],"URL":[53,180],"string":[54],"itself":[55],"use":[57],"before":[58],"existing":[59],"processing":[60],"methods.":[61],"We":[62,127],"run":[63],"experiments":[64],"large":[67],"dataset":[68],"demonstrate":[70],"75%":[72],"reduction":[73],"in":[74,122],"workload":[75],"size":[76],"while":[77],"retaining":[78],"at":[79],"least":[80],"90%":[81],"URLs.":[84,171],"Existing":[85],"methods":[86],"do":[87],"not":[88,152],"scale":[89],"well":[90],"with":[91,156],"hundreds":[93],"millions":[95],"encountered":[98],"every":[99],"day":[100],"problem":[103],"is":[104,114],"heavily-imbalanced,":[106],"large-scale":[107,157],"binary":[108],"classification":[109],"problem.":[110],"Our":[111,172],"proposed":[112],"method":[113],"able":[115],"handle":[117],"nearly":[118],"two":[119,129],"million":[120],"less":[123],"than":[124],"five":[125],"minutes.":[126],"generate":[128],"filtering":[130,143],"models":[131],"by":[132],"using":[133],"lexical":[134],"features":[135],"descriptive":[137],"features,":[138],"then":[140],"combine":[141],"results.":[144],"The":[145],"on-line":[146],"learning":[147],"algorithms":[148],"applied":[150],"here":[151],"dealing":[155],"data":[158],"sets":[159],"also":[161],"fitting":[163],"very":[165],"short":[166],"lifetime":[167],"characteristics":[168],"can":[174],"significantly":[175],"reduce":[176],"volume":[178],"queries":[181],"which":[183],"further":[184],"analysis":[185],"needs":[186],"be":[188],"performed,":[189],"saving":[190],"both":[191],"computing":[192],"time":[193],"bandwidth":[195],"used":[196],"retrieval.":[199]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":7},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":5},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
