{"id":"https://openalex.org/W2027116318","doi":"https://doi.org/10.1145/1531914.1531916","title":"Looking into the past to better classify web spam","display_name":"Looking into the past to better classify web spam","publication_year":2009,"publication_date":"2009-04-21","ids":{"openalex":"https://openalex.org/W2027116318","doi":"https://doi.org/10.1145/1531914.1531916","mag":"2027116318"},"language":"en","primary_location":{"id":"doi:10.1145/1531914.1531916","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1531914.1531916","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th International Workshop on Adversarial Information Retrieval on the Web","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030163252","display_name":"Na Dai","orcid":"https://orcid.org/0000-0002-3223-7737"},"institutions":[{"id":"https://openalex.org/I186143895","display_name":"Lehigh University","ror":"https://ror.org/012afjb06","country_code":"US","type":"education","lineage":["https://openalex.org/I186143895"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Na Dai","raw_affiliation_strings":["Lehigh University, Bethlehem, PA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Lehigh University, Bethlehem, PA","institution_ids":["https://openalex.org/I186143895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042328810","display_name":"Brian D. Davison","orcid":"https://orcid.org/0000-0002-9326-3648"},"institutions":[{"id":"https://openalex.org/I186143895","display_name":"Lehigh University","ror":"https://ror.org/012afjb06","country_code":"US","type":"education","lineage":["https://openalex.org/I186143895"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brian D. Davison","raw_affiliation_strings":["Lehigh University, Bethlehem, PA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Lehigh University, Bethlehem, PA","institution_ids":["https://openalex.org/I186143895"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024068814","display_name":"Xiaoguang Qi","orcid":"https://orcid.org/0000-0002-3465-9482"},"institutions":[{"id":"https://openalex.org/I186143895","display_name":"Lehigh University","ror":"https://ror.org/012afjb06","country_code":"US","type":"education","lineage":["https://openalex.org/I186143895"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaoguang Qi","raw_affiliation_strings":["Lehigh University, Bethlehem, PA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Lehigh University, Bethlehem, PA","institution_ids":["https://openalex.org/I186143895"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":13.9758,"has_fulltext":false,"cited_by_count":47,"citation_normalized_percentile":{"value":0.98601594,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6976489424705505},{"id":"https://openalex.org/keywords/spambot","display_name":"Spambot","score":0.5483064651489258},{"id":"https://openalex.org/keywords/forum-spam","display_name":"Forum spam","score":0.47268015146255493},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.4201655387878418},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32864075899124146},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.2839244604110718},{"id":"https://openalex.org/keywords/spamming","display_name":"Spamming","score":0.26853567361831665}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6976489424705505},{"id":"https://openalex.org/C127735637","wikidata":"https://www.wikidata.org/wiki/Q2306702","display_name":"Spambot","level":4,"score":0.5483064651489258},{"id":"https://openalex.org/C157310412","wikidata":"https://www.wikidata.org/wiki/Q3140915","display_name":"Forum spam","level":5,"score":0.47268015146255493},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4201655387878418},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32864075899124146},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.2839244604110718},{"id":"https://openalex.org/C158955206","wikidata":"https://www.wikidata.org/wiki/Q83058","display_name":"Spamming","level":3,"score":0.26853567361831665}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1531914.1531916","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1531914.1531916","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th International Workshop on Adversarial Information Retrieval on the Web","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.151.8296","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.151.8296","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cse.lehigh.edu/~brian/pubs/2009/AIRWeb/looking-into-past.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W202878612","https://openalex.org/W1570448133","https://openalex.org/W1574862351","https://openalex.org/W1845137714","https://openalex.org/W2014415866","https://openalex.org/W2053377618","https://openalex.org/W2066055909","https://openalex.org/W2099789730","https://openalex.org/W2102232904","https://openalex.org/W2107428549","https://openalex.org/W2116196348","https://openalex.org/W2118942057","https://openalex.org/W2132605240","https://openalex.org/W2138323616","https://openalex.org/W2139148100","https://openalex.org/W2140204390","https://openalex.org/W2141243388","https://openalex.org/W2146859361","https://openalex.org/W2156632103","https://openalex.org/W2160952429","https://openalex.org/W2184111937","https://openalex.org/W2561832073","https://openalex.org/W2966207845","https://openalex.org/W6608203075","https://openalex.org/W6683392976"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2148238780","https://openalex.org/W321853934","https://openalex.org/W2012322694","https://openalex.org/W2068696370","https://openalex.org/W2163764145","https://openalex.org/W2738826738","https://openalex.org/W2133365643","https://openalex.org/W1547781172","https://openalex.org/W2504351161"],"abstract_inverted_index":{"Web":[0],"spamming":[1],"techniques":[2,68],"aim":[3],"to":[4,60,69,100],"achieve":[5],"undeserved":[6],"rankings":[7],"in":[8,44],"search":[9],"results.":[10],"Research":[11],"has":[12],"been":[13],"widely":[14],"conducted":[15],"on":[16,73,80,84],"identifying":[17],"such":[18],"spam":[19,26,45,62,93],"and":[20],"neutralizing":[21],"its":[22],"influence.":[23],"However,":[24],"existing":[25],"detection":[27],"work":[28],"only":[29,105],"considers":[30,106],"current":[31,74,107],"information.":[32],"We":[33,64],"argue":[34],"that":[35,89],"historical":[36,55],"web":[37,58],"page":[38,75,108],"information":[39],"may":[40],"also":[41],"be":[42],"important":[43],"classification.":[46,63],"In":[47],"this":[48],"paper,":[49],"we":[50],"use":[51,65],"content":[52,76],"features":[53],"from":[54],"versions":[56],"of":[57],"pages":[59],"improve":[61],"supervised":[66],"learning":[67],"combine":[70],"classifiers":[71,78],"based":[72,79],"with":[77],"temporal":[81],"features.":[82],"Experiments":[83],"the":[85],"WEBSPAM-UK2007":[86],"dataset":[87],"show":[88],"our":[90],"approach":[91],"improves":[92],"classification":[94],"F-measure":[95],"performance":[96],"by":[97],"30%":[98],"compared":[99],"a":[101],"baseline":[102],"classifier":[103],"which":[104],"content.":[109]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":9},{"year":2013,"cited_by_count":6},{"year":2012,"cited_by_count":5}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
