{"id":"https://openalex.org/W2012322694","doi":"https://doi.org/10.1142/s0218843014410019","title":"A Perspective of Evolution After Five Years: A Large-Scale Study of Web Spam Evolution","display_name":"A Perspective of Evolution After Five Years: A Large-Scale Study of Web Spam Evolution","publication_year":2014,"publication_date":"2014-06-01","ids":{"openalex":"https://openalex.org/W2012322694","doi":"https://doi.org/10.1142/s0218843014410019","mag":"2012322694"},"language":"en","primary_location":{"id":"doi:10.1142/s0218843014410019","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0218843014410019","pdf_url":null,"source":{"id":"https://openalex.org/S17147961","display_name":"International Journal of Cooperative Information Systems","issn_l":"0218-8430","issn":["0218-8430","1793-6365"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Cooperative Information Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102730633","display_name":"Wang De","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"De Wang","raw_affiliation_strings":["College of Computing, Georgia Institute of Technology, Atlanta, Georgia 30332-0765, United States"],"affiliations":[{"raw_affiliation_string":"College of Computing, Georgia Institute of Technology, Atlanta, Georgia 30332-0765, United States","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011425625","display_name":"Danesh Irani","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Danesh Irani","raw_affiliation_strings":["College of Computing, Georgia Institute of Technology, Atlanta, Georgia 30332-0765, United States"],"affiliations":[{"raw_affiliation_string":"College of Computing, Georgia Institute of Technology, Atlanta, Georgia 30332-0765, United States","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081442372","display_name":"Calton Pu","orcid":"https://orcid.org/0000-0002-6616-8987"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Calton Pu","raw_affiliation_strings":["College of Computing, Georgia Institute of Technology, Atlanta, Georgia 30332-0765, United States"],"affiliations":[{"raw_affiliation_string":"College of Computing, Georgia Institute of Technology, Atlanta, Georgia 30332-0765, United States","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102730633"],"corresponding_institution_ids":["https://openalex.org/I130701444"],"apc_list":null,"apc_paid":null,"fwci":3.1557,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.92501178,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"23","issue":"02","first_page":"1441001","last_page":"1441001"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spambot","display_name":"Spambot","score":0.8358813524246216},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8291487097740173},{"id":"https://openalex.org/keywords/forum-spam","display_name":"Forum spam","score":0.7609456777572632},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.7507803440093994},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.6272979378700256},{"id":"https://openalex.org/keywords/spamming","display_name":"Spamming","score":0.6256483197212219},{"id":"https://openalex.org/keywords/spamdexing","display_name":"Spamdexing","score":0.531701922416687},{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.49506059288978577},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.457913339138031},{"id":"https://openalex.org/keywords/web-development","display_name":"Web development","score":0.4069618582725525},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.342237651348114},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.3402528762817383},{"id":"https://openalex.org/keywords/web-search-engine","display_name":"Web search engine","score":0.28566575050354004}],"concepts":[{"id":"https://openalex.org/C127735637","wikidata":"https://www.wikidata.org/wiki/Q2306702","display_name":"Spambot","level":4,"score":0.8358813524246216},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8291487097740173},{"id":"https://openalex.org/C157310412","wikidata":"https://www.wikidata.org/wiki/Q3140915","display_name":"Forum spam","level":5,"score":0.7609456777572632},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.7507803440093994},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.6272979378700256},{"id":"https://openalex.org/C158955206","wikidata":"https://www.wikidata.org/wiki/Q83058","display_name":"Spamming","level":3,"score":0.6256483197212219},{"id":"https://openalex.org/C13565553","wikidata":"https://www.wikidata.org/wiki/Q804206","display_name":"Spamdexing","level":5,"score":0.531701922416687},{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.49506059288978577},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.457913339138031},{"id":"https://openalex.org/C79373723","wikidata":"https://www.wikidata.org/wiki/Q386275","display_name":"Web development","level":3,"score":0.4069618582725525},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.342237651348114},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.3402528762817383},{"id":"https://openalex.org/C521815418","wikidata":"https://www.wikidata.org/wiki/Q4182287","display_name":"Web search engine","level":4,"score":0.28566575050354004}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1142/s0218843014410019","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0218843014410019","pdf_url":null,"source":{"id":"https://openalex.org/S17147961","display_name":"International Journal of Cooperative Information Systems","issn_l":"0218-8430","issn":["0218-8430","1793-6365"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Cooperative Information Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320308861","display_name":"Woods Hole Oceanographic Institution","ror":"https://ror.org/03zbnzt98"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320337388","display_name":"Division of Computer and Network Systems","ror":"https://ror.org/02rdzmk74"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W202878612","https://openalex.org/W1845137714","https://openalex.org/W1924689489","https://openalex.org/W1965555277","https://openalex.org/W2027954012","https://openalex.org/W2050028965","https://openalex.org/W2103333826","https://openalex.org/W2112400959","https://openalex.org/W2133990480","https://openalex.org/W2137168471","https://openalex.org/W2138245071","https://openalex.org/W2148238780","https://openalex.org/W2165612380","https://openalex.org/W2169070925","https://openalex.org/W2169270715","https://openalex.org/W2435251607"],"related_works":["https://openalex.org/W2080521416","https://openalex.org/W2091208042","https://openalex.org/W2018000323","https://openalex.org/W2920666515","https://openalex.org/W2738482239","https://openalex.org/W2246912952","https://openalex.org/W2552201931","https://openalex.org/W1551975479","https://openalex.org/W3098355271","https://openalex.org/W2103881442"],"abstract_inverted_index":{"Identifying":[0],"and":[1,11,55,125,161,197,207],"detecting":[2],"web":[3,24,31,57,64,76,80,99,135,158,171],"spam":[4,56,77,79,98,129,145,172],"is":[5,42],"an":[6],"ongoing":[7],"battle":[8],"between":[9],"spam-researchers":[10,41],"spammers":[12,187,204],"which":[13,102],"has":[14],"been":[15],"going":[16],"on":[17,49,123,216],"since":[18,173],"search":[19],"engines":[20],"allowed":[21],"searching":[22],"of":[23,30,53,62,95,128,151],"pages":[25,65,100,136],"to":[26,68,106,211],"the":[27,43,70,87,109,174,213],"modern":[28],"sharing":[29],"links":[32,139],"via":[33],"social":[34,189],"networks.":[35],"A":[36],"common":[37],"challenge":[38],"faced":[39],"by":[40],"fact":[44],"that":[45],"new":[46,209],"techniques":[47,210],"depend":[48],"requiring":[50],"a":[51,93,115],"corpus":[52,94,117,133,153],"legitimate":[54,63],"pages.":[58,81],"Although":[59],"large":[60],"corpora":[61],"are":[66],"available":[67,105],"researchers,":[69],"same":[71],"cannot":[72],"be":[73],"said":[74],"about":[75],"or":[78],"In":[82],"this":[83,152],"paper,":[84],"we":[85,103],"introduce":[86],"Webb":[88,176],"Spam":[89,177],"Corpus":[90,178],"2011":[91],"\u2014":[92,101],"approximately":[96],"330,000":[97],"make":[104],"researchers":[107,119],"in":[108,141,170,181,191],"fight":[110],"against":[111],"spam.":[112],"By":[113],"having":[114],"standard":[116],"available,":[118],"can":[120],"collaborate":[121],"better":[122],"developing":[124],"reporting":[126],"results":[127],"filtering":[130],"techniques.":[131],"The":[132],"contains":[134],"crawled":[137],"from":[138],"found":[140],"over":[142,201],"6.3":[143],"million":[144],"emails.":[146],"We":[147,164],"analyze":[148],"multiple":[149],"aspects":[150],"including":[154],"redirection,":[155],"HTTP":[156,195,217],"headers,":[157],"page":[159],"content,":[160],"classification":[162],"evaluation.":[163],"also":[165,199],"provide":[166],"insights":[167,184],"into":[168],"changes":[169],"last":[175],"was":[179],"released":[180],"2006.":[182],"These":[183],"include:":[185],"(1)":[186],"manipulate":[188],"media":[190],"spreading":[192],"spam;":[193],"(2)":[194],"headers":[196],"content":[198],"change":[200],"time;":[202],"(3)":[203],"have":[205],"evolved":[206],"adopted":[208],"avoid":[212],"detection":[214],"based":[215],"header":[218],"information.":[219]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
