{"id":"https://openalex.org/W2114715970","doi":"https://doi.org/10.1145/1244408.1244417","title":"A large-scale study of link spam detection by graph algorithms","display_name":"A large-scale study of link spam detection by graph algorithms","publication_year":2007,"publication_date":"2007-05-08","ids":{"openalex":"https://openalex.org/W2114715970","doi":"https://doi.org/10.1145/1244408.1244417","mag":"2114715970"},"language":"en","primary_location":{"id":"doi:10.1145/1244408.1244417","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1244408.1244417","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd international workshop on Adversarial information retrieval on the web","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027370006","display_name":"Hiroo Saito","orcid":null},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Hiroo Saito","raw_affiliation_strings":["Aihara Complexity Modelling Project, ERATO, JST, Tokyo, Japan and University of Tokyo, Tokyo, Japan","Aihara Complexity Modelling Project, ERATO, JST, Tokyo, Japan and University of Tokyo, Tokyo, Japan#TAB#"],"affiliations":[{"raw_affiliation_string":"Aihara Complexity Modelling Project, ERATO, JST, Tokyo, Japan and University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]},{"raw_affiliation_string":"Aihara Complexity Modelling Project, ERATO, JST, Tokyo, Japan and University of Tokyo, Tokyo, Japan#TAB#","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005790090","display_name":"Masashi Toyoda","orcid":"https://orcid.org/0000-0001-9473-5531"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Masashi Toyoda","raw_affiliation_strings":["University of Tokyo, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056438865","display_name":"Masaru Kitsuregawa","orcid":"https://orcid.org/0000-0003-4027-2994"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Masaru Kitsuregawa","raw_affiliation_strings":["University of Tokyo, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038078845","display_name":"Kazuyuki Aihara","orcid":"https://orcid.org/0000-0002-4602-9816"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kazuyuki Aihara","raw_affiliation_strings":["Aihara Complexity Modelling Project, ERATO, JST, Tokyo, Japan and University of Tokyo, Tokyo, Japan","Aihara Complexity Modelling Project, ERATO, JST, Tokyo, Japan and University of Tokyo, Tokyo, Japan#TAB#"],"affiliations":[{"raw_affiliation_string":"Aihara Complexity Modelling Project, ERATO, JST, Tokyo, Japan and University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]},{"raw_affiliation_string":"Aihara Complexity Modelling Project, ERATO, JST, Tokyo, Japan and University of Tokyo, Tokyo, Japan#TAB#","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5027370006"],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":16.5663,"has_fulltext":false,"cited_by_count":66,"citation_normalized_percentile":{"value":0.98836371,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"45","last_page":"48"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7363892793655396},{"id":"https://openalex.org/keywords/link","display_name":"Link (geometry)","score":0.5045000314712524},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4648180603981018},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4251115322113037},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.38035473227500916},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.29260459542274475},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.10385558009147644}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7363892793655396},{"id":"https://openalex.org/C2778753846","wikidata":"https://www.wikidata.org/wiki/Q6554239","display_name":"Link (geometry)","level":2,"score":0.5045000314712524},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4648180603981018},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4251115322113037},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.38035473227500916},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.29260459542274475},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.10385558009147644},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1244408.1244417","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1244408.1244417","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd international workshop on Adversarial information retrieval on the web","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.91.2566","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.91.2566","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://airweb.cse.lehigh.edu/2007/papers/paper_125.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W202878612","https://openalex.org/W1529464143","https://openalex.org/W1674850363","https://openalex.org/W1845137714","https://openalex.org/W1854214752","https://openalex.org/W1868111879","https://openalex.org/W1984374364","https://openalex.org/W2007687650","https://openalex.org/W2107428549","https://openalex.org/W2108671511","https://openalex.org/W2113417702","https://openalex.org/W2122505515","https://openalex.org/W2130854091","https://openalex.org/W2138621811","https://openalex.org/W2139148100","https://openalex.org/W2169270715","https://openalex.org/W2175110005","https://openalex.org/W2337420946","https://openalex.org/W6639055396","https://openalex.org/W6676897750","https://openalex.org/W6678180042"],"related_works":["https://openalex.org/W3013034866","https://openalex.org/W2386767533","https://openalex.org/W4248042515","https://openalex.org/W3217390384","https://openalex.org/W2059283639","https://openalex.org/W2152407399","https://openalex.org/W318894253","https://openalex.org/W2391817034","https://openalex.org/W4385964823","https://openalex.org/W1846629211"],"abstract_inverted_index":{"Link":[0],"spam":[1,67,115,138,150,159],"refers":[2],"to":[3,5,74,113],"attempts":[4],"promote":[6],"the":[7,40,53,66,75,79,90,95,98,118,164,181,184,190],"ranking":[8,17],"of":[9,29,45,52,97,105,109,126],"spammers":[10],"\u2019":[11],"web":[12,76,80],"sites":[13,30,59,116,160,174],"by":[14,141,183,189],"deceiving":[15],"link-based":[16],"algorithms":[18,73],"in":[19,48,94,117,161,180],"search":[20],"engines.":[21],"Spammers":[22],"often":[23],"create":[24],"densely":[25],"connected":[26,86],"link":[27,46,110,127,133],"structure":[28,42],"so":[31],"called":[32],"\u201clink":[33],"farm\u201d.":[34],"In":[35],"this":[36],"paper,":[37],"we":[38,69,100,120,130],"study":[39],"overall":[41],"and":[43,60,151,166,171,188],"distribution":[44],"farms":[47,134],"a":[49,136,142],"large-scale":[50],"graph":[51,72,81],"Japanese":[54],"Web":[55],"with":[56,177],"5.8":[57],"million":[58,62,158],"283":[61],"links.":[63],"To":[64],"examine":[65],"structure,":[68],"apply":[70],"three":[71],"graph.":[77],"First,":[78],"is":[82],"decomposed":[83],"into":[84],"strongly":[85],"components":[87,107],"(SCC).":[88],"Beside":[89],"largest":[91],"SCC":[92],"(core)":[93],"center":[96],"web,":[99],"have":[101],"observed":[102],"that":[103,146],"most":[104],"large":[106],"consist":[108],"farms.":[111,128],"Next,":[112],"extract":[114],"core,":[119,165],"enumerate":[121],"maximal":[122,185],"cliques":[123],"as":[124,135,175],"seeds":[125],"Finally,":[129],"expand":[131],"these":[132],"reliable":[137],"seed":[139],"set":[140],"minimum":[143,191],"cut":[144,192],"technique":[145],"separates":[147],"links":[148],"among":[149],"non-spam":[152],"sites.":[153],"We":[154],"found":[155],"about":[156],"0.6":[157],"SCCs":[162],"around":[163],"extracted":[167],"additional":[168],"8":[169],"thousand":[170,173],"49":[172],"spams":[176],"high":[178],"precision":[179],"core":[182],"clique":[186],"enumeration":[187],"technique,":[193],"respectively.":[194],"1.":[195]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":8},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":6}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
