{"id":"https://openalex.org/W2052747824","doi":"https://doi.org/10.1145/1145581.1145634","title":"Catching web crawlers in the act","display_name":"Catching web crawlers in the act","publication_year":2006,"publication_date":"2006-01-01","ids":{"openalex":"https://openalex.org/W2052747824","doi":"https://doi.org/10.1145/1145581.1145634","mag":"2052747824"},"language":"en","primary_location":{"id":"doi:10.1145/1145581.1145634","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1145581.1145634","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 6th international conference on Web engineering  - ICWE '06","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111760061","display_name":"An\u00e1lia G. Louren\u00e7o","orcid":null},"institutions":[{"id":"https://openalex.org/I99682543","display_name":"University of Minho","ror":"https://ror.org/037wpkx04","country_code":"PT","type":"education","lineage":["https://openalex.org/I99682543"]}],"countries":["PT"],"is_corresponding":true,"raw_author_name":"An\u00e1lia G. Louren\u00e7o","raw_affiliation_strings":["Universidade do Minho, Braga, Portugal","[Universidade do Minho, Braga, Portugal]"],"affiliations":[{"raw_affiliation_string":"Universidade do Minho, Braga, Portugal","institution_ids":["https://openalex.org/I99682543"]},{"raw_affiliation_string":"[Universidade do Minho, Braga, Portugal]","institution_ids":["https://openalex.org/I99682543"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061961536","display_name":"Orlando Belo","orcid":"https://orcid.org/0000-0003-2157-8891"},"institutions":[{"id":"https://openalex.org/I99682543","display_name":"University of Minho","ror":"https://ror.org/037wpkx04","country_code":"PT","type":"education","lineage":["https://openalex.org/I99682543"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Orlando O. Belo","raw_affiliation_strings":["Universidade do Minho, Braga, Portugal","[Universidade do Minho, Braga, Portugal]"],"affiliations":[{"raw_affiliation_string":"Universidade do Minho, Braga, Portugal","institution_ids":["https://openalex.org/I99682543"]},{"raw_affiliation_string":"[Universidade do Minho, Braga, Portugal]","institution_ids":["https://openalex.org/I99682543"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5111760061"],"corresponding_institution_ids":["https://openalex.org/I99682543"],"apc_list":null,"apc_paid":null,"fwci":3.4476,"has_fulltext":false,"cited_by_count":34,"citation_normalized_percentile":{"value":0.92948392,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"265","last_page":"265"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.9524562358856201},{"id":"https://openalex.org/keywords/clickstream","display_name":"Clickstream","score":0.808729887008667},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6896179914474487},{"id":"https://openalex.org/keywords/crawling","display_name":"Crawling","score":0.5843423008918762},{"id":"https://openalex.org/keywords/profiling","display_name":"Profiling (computer programming)","score":0.5592312812805176},{"id":"https://openalex.org/keywords/web-server","display_name":"Web server","score":0.5485395789146423},{"id":"https://openalex.org/keywords/focused-crawler","display_name":"Focused crawler","score":0.54535311460495},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5367786288261414},{"id":"https://openalex.org/keywords/web-api","display_name":"Web API","score":0.3316894471645355},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3280482888221741},{"id":"https://openalex.org/keywords/static-web-page","display_name":"Static web page","score":0.21416965126991272},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.16342586278915405},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.127894788980484}],"concepts":[{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.9524562358856201},{"id":"https://openalex.org/C138744977","wikidata":"https://www.wikidata.org/wiki/Q5132438","display_name":"Clickstream","level":5,"score":0.808729887008667},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6896179914474487},{"id":"https://openalex.org/C100368936","wikidata":"https://www.wikidata.org/wiki/Q1411725","display_name":"Crawling","level":2,"score":0.5843423008918762},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.5592312812805176},{"id":"https://openalex.org/C11392498","wikidata":"https://www.wikidata.org/wiki/Q11288","display_name":"Web server","level":3,"score":0.5485395789146423},{"id":"https://openalex.org/C73340581","wikidata":"https://www.wikidata.org/wiki/Q5463958","display_name":"Focused crawler","level":5,"score":0.54535311460495},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5367786288261414},{"id":"https://openalex.org/C127613066","wikidata":"https://www.wikidata.org/wiki/Q557770","display_name":"Web API","level":4,"score":0.3316894471645355},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3280482888221741},{"id":"https://openalex.org/C173576120","wikidata":"https://www.wikidata.org/wiki/Q2641220","display_name":"Static web page","level":4,"score":0.21416965126991272},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.16342586278915405},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.127894788980484},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/1145581.1145634","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1145581.1145634","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 6th international conference on Web engineering  - ICWE '06","raw_type":"proceedings-article"},{"id":"pmh:oai:repositorium.sdum.uminho.pt:1822/72111","is_oa":false,"landing_page_url":"http://hdl.handle.net/1822/72111","pdf_url":null,"source":{"id":"https://openalex.org/S4306400354","display_name":"Reposit\u00f3riUM (Universidade do Minho)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I99682543","host_organization_name":"University of Minho","host_organization_lineage":["https://openalex.org/I99682543"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"conference paper"},{"id":"pmh:oai:repositorium.uminho.pt:1822/72111","is_oa":false,"landing_page_url":"https://hdl.handle.net/1822/72111","pdf_url":null,"source":{"id":"https://openalex.org/S4306402433","display_name":"Portuguese National Funding Agency for Science, Research and Technology (RCAAP Project by FCT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"conference paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320334779","display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","ror":"https://ror.org/00snfqn58"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W131898573","https://openalex.org/W1548349843","https://openalex.org/W1583640058","https://openalex.org/W1593043209","https://openalex.org/W1854214752","https://openalex.org/W2037284289","https://openalex.org/W2038311291","https://openalex.org/W2085305295","https://openalex.org/W2125055259","https://openalex.org/W2140796896","https://openalex.org/W2162761309"],"related_works":["https://openalex.org/W2375180657","https://openalex.org/W4385695127","https://openalex.org/W4248730791","https://openalex.org/W2026132847","https://openalex.org/W1506122440","https://openalex.org/W2137810919","https://openalex.org/W2358310581","https://openalex.org/W2352686120","https://openalex.org/W2019080882","https://openalex.org/W2372594123"],"abstract_inverted_index":{"This":[0],"paper":[1],"recommends":[2],"a":[3,58,75,87,112,119],"new":[4],"approach":[5],"to":[6,85,91,127],"the":[7,129,132],"detection":[8,21,61],"and":[9,30,50,74,98,136],"containment":[10],"of":[11,26,102,131],"Web":[12,27,65,71,114,143],"crawler":[13,23,51,66,104],"traverses":[14,67],"based":[15,68],"on":[16,69],"clickstream":[17],"data":[18,39],"mining.":[19],"Timely":[20],"prevents":[22],"abusive":[24],"consumption":[25],"server":[28],"resources":[29],"eventual":[31,80],"site":[32,115],"contents":[33],"privacy":[34],"or":[35],"copyrights":[36],"violation.":[37],"Clickstream":[38],"differentiation":[40],"ensures":[41],"focused":[42],"usage":[43],"analysis,":[44],"valuable":[45],"both":[46],"for":[47,121,139],"regular":[48],"users":[49],"profiling.":[52],"Our":[53],"platform,":[54],"named":[55],"ClickTips,":[56],"sustains":[57],"site-specific,":[59],"updatable":[60],"model":[62,77,88],"that":[63,78,99],"tags":[64],"incremental":[70],"session":[72],"inspection":[73],"decision":[76],"assesses":[79],"containment.":[81],"The":[82],"goal":[83],"is":[84,100],"deliver":[86],"flexible":[89],"enough":[90],"keep":[92],"up":[93],"with":[94],"crawling":[95],"continuous":[96],"evolving":[97],"capable":[101],"detecting":[103],"presence":[105],"as":[106,108,118,124],"soon":[107],"possible.":[109],"We":[110],"use":[111],"real-world":[113],"case":[116],"study":[117],"support":[120],"process":[122],"description,":[123],"well":[125],"as,":[126],"evaluate":[128],"accuracy":[130],"obtained":[133],"classification":[134],"models":[135],"their":[137],"ability":[138],"discovering":[140],"previously":[141],"unknown":[142],"crawlers.":[144]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":4},{"year":2013,"cited_by_count":5},{"year":2012,"cited_by_count":4}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
