{"id":"https://openalex.org/W2168479832","doi":"https://doi.org/10.1145/1772690.1772742","title":"Large-scale bot detection for search engines","display_name":"Large-scale bot detection for search engines","publication_year":2010,"publication_date":"2010-04-26","ids":{"openalex":"https://openalex.org/W2168479832","doi":"https://doi.org/10.1145/1772690.1772742","mag":"2168479832"},"language":"en","primary_location":{"id":"doi:10.1145/1772690.1772742","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1772690.1772742","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th international conference on World wide web","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085862383","display_name":"Hongwen Kang","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hongwen Kang","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041659067","display_name":"Kuansan Wang","orcid":"https://orcid.org/0000-0001-7089-7966"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kuansan Wang","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034476019","display_name":"David Soukal","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Soukal","raw_affiliation_strings":["Microsoft, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056139416","display_name":"Fritz Behr","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fritz Behr","raw_affiliation_strings":["Microsoft, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078418486","display_name":"Zijian Zheng","orcid":"https://orcid.org/0000-0003-0354-9560"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zijian Zheng","raw_affiliation_strings":["Microsoft, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5085862383"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":7.8631,"has_fulltext":false,"cited_by_count":33,"citation_normalized_percentile":{"value":0.97328915,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"501","last_page":"510"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8027079701423645},{"id":"https://openalex.org/keywords/captcha","display_name":"CAPTCHA","score":0.7652671337127686},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.7233487367630005},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6819431781768799},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5765829682350159},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5510717034339905},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5434756875038147},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5364310145378113},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised learning","score":0.5077613592147827},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4997985363006592},{"id":"https://openalex.org/keywords/semi-supervised-learning","display_name":"Semi-supervised learning","score":0.486013263463974},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.413729727268219},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.41027766466140747},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.1119939386844635}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8027079701423645},{"id":"https://openalex.org/C163339463","wikidata":"https://www.wikidata.org/wiki/Q484598","display_name":"CAPTCHA","level":2,"score":0.7652671337127686},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.7233487367630005},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6819431781768799},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5765829682350159},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5510717034339905},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5434756875038147},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5364310145378113},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.5077613592147827},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4997985363006592},{"id":"https://openalex.org/C58973888","wikidata":"https://www.wikidata.org/wiki/Q1041418","display_name":"Semi-supervised learning","level":2,"score":0.486013263463974},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.413729727268219},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.41027766466140747},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.1119939386844635},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1772690.1772742","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1772690.1772742","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th international conference on World wide web","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.362.6755","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.362.6755","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.hwkang.com/files/WWW2010/wfp0206-kang.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W123401891","https://openalex.org/W155855048","https://openalex.org/W1511986666","https://openalex.org/W1574877594","https://openalex.org/W1576271900","https://openalex.org/W1583640058","https://openalex.org/W1603565383","https://openalex.org/W1675174563","https://openalex.org/W1756896031","https://openalex.org/W1817561967","https://openalex.org/W1861993554","https://openalex.org/W1922851884","https://openalex.org/W1995101231","https://openalex.org/W1996869586","https://openalex.org/W1997029057","https://openalex.org/W2007089944","https://openalex.org/W2008778571","https://openalex.org/W2031788415","https://openalex.org/W2037284289","https://openalex.org/W2037603696","https://openalex.org/W2048679005","https://openalex.org/W2049633694","https://openalex.org/W2079057609","https://openalex.org/W2093135704","https://openalex.org/W2098136027","https://openalex.org/W2101210369","https://openalex.org/W2107008379","https://openalex.org/W2107968230","https://openalex.org/W2123504579","https://openalex.org/W2125055259","https://openalex.org/W2125592902","https://openalex.org/W2125838338","https://openalex.org/W2127816222","https://openalex.org/W2129245267","https://openalex.org/W2133227149","https://openalex.org/W2135645798","https://openalex.org/W2136504847","https://openalex.org/W2139434830","https://openalex.org/W2148603752","https://openalex.org/W2163166770","https://openalex.org/W2170121748","https://openalex.org/W2296437506","https://openalex.org/W2397866408","https://openalex.org/W2743603832","https://openalex.org/W3098372638","https://openalex.org/W4211064163","https://openalex.org/W4253573210","https://openalex.org/W4256238177","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2131513867","https://openalex.org/W4306904961","https://openalex.org/W2752124967","https://openalex.org/W2183800414","https://openalex.org/W2184099506","https://openalex.org/W3195782808","https://openalex.org/W1473009882","https://openalex.org/W177233457","https://openalex.org/W3157597523","https://openalex.org/W1481013880"],"abstract_inverted_index":{"In":[0,139],"this":[1,101],"paper,":[2],"we":[3,61,103],"propose":[4,49],"a":[5,50,75,106],"semi-supervised":[6,51,107],"learning":[7,108],"approach":[8,144],"for":[9],"classifying":[10],"program":[11],"(bot)":[12],"generated":[13],"web":[14],"search":[15,35],"traffic":[16],"from":[17,71],"that":[18,30,41,53],"of":[19,34,78,113],"genuine":[20],"human":[21],"users.":[22],"The":[23],"work":[24],"is":[25,90],"motivated":[26],"by":[27],"the":[28,31,55,63,72,93,114,119,136,142,152],"challenge":[29],"enormous":[32],"amount":[33],"data":[36,73,89,94,116],"pose":[37],"to":[38,69,110,117,134,151],"traditional":[39,153],"approaches":[40],"rely":[42],"on":[43],"fully":[44],"annotated":[45],"training":[46,79,88,137],"samples.":[47],"We":[48],"framework":[52],"addresses":[54],"problem":[56],"in":[57],"multiple":[58],"fronts.":[59],"First,":[60],"use":[62],"CAPTCHA":[64],"technique":[65],"and":[66,130],"simple":[67],"heuristics":[68],"extract":[70],"logs":[74],"large":[76],"set":[77],"samples":[80],"with":[81],"initial":[82],"labels,":[83],"though":[84],"directly":[85],"using":[86],"these":[87],"problematic":[91],"because":[92],"thus":[95],"sampled":[96],"are":[97],"biased.":[98],"To":[99],"tackle":[100],"problem,":[102],"further":[104],"develop":[105],"algorithm":[109],"take":[111],"advantage":[112],"unlabeled":[115],"improve":[118],"classification":[120],"performance.":[121],"These":[122],"two":[123],"proposed":[124,143],"algorithms":[125],"can":[126],"be":[127],"seamlessly":[128],"combined":[129],"very":[131],"cost":[132],"efficient":[133],"scale":[135],"process.":[138],"our":[140],"experiment,":[141],"showed":[145],"significant":[146],"(i.e.":[147],"2:1)":[148],"improvement":[149],"compared":[150],"supervised":[154],"approach.":[155]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":5},{"year":2012,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
