{"id":"https://openalex.org/W2157551792","doi":"https://doi.org/10.1145/1183463.1183468","title":"Automated gathering of Web information","display_name":"Automated gathering of Web information","publication_year":2006,"publication_date":"2006-11-01","ids":{"openalex":"https://openalex.org/W2157551792","doi":"https://doi.org/10.1145/1183463.1183468","mag":"2157551792"},"language":"en","primary_location":{"id":"doi:10.1145/1183463.1183468","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1183463.1183468","pdf_url":null,"source":{"id":"https://openalex.org/S97833917","display_name":"ACM Transactions on Internet Technology","issn_l":"1533-5399","issn":["1533-5399","1557-6051"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Internet Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044849571","display_name":"Bernard J. Jansen","orcid":"https://orcid.org/0000-0002-6468-6609"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bernard J. Jansen","raw_affiliation_strings":["The Pennsylvania State University"],"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112631132","display_name":"Tracy Mullen","orcid":null},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tracy Mullen","raw_affiliation_strings":["The Pennsylvania State University"],"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108224935","display_name":"Amanda Spink","orcid":null},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Amanda Spink","raw_affiliation_strings":["The University of Pittsburgh"],"affiliations":[{"raw_affiliation_string":"The University of Pittsburgh","institution_ids":["https://openalex.org/I170201317"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074139750","display_name":"Jan Pedersen","orcid":"https://orcid.org/0000-0002-3099-2106"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jan Pedersen","raw_affiliation_strings":["Overture Services, Inc"],"affiliations":[{"raw_affiliation_string":"Overture Services, Inc","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5044849571"],"corresponding_institution_ids":["https://openalex.org/I130769515"],"apc_list":null,"apc_paid":null,"fwci":11.1884,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.97909356,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"6","issue":"4","first_page":"442","last_page":"464"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8654356598854065},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.6645158529281616},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6084398031234741},{"id":"https://openalex.org/keywords/session","display_name":"Session (web analytics)","score":0.6054387092590332},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5701301097869873},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.4957951009273529},{"id":"https://openalex.org/keywords/web-search-query","display_name":"Web search query","score":0.4908490777015686},{"id":"https://openalex.org/keywords/semantic-search","display_name":"Semantic search","score":0.4347694516181946},{"id":"https://openalex.org/keywords/web-search-engine","display_name":"Web search engine","score":0.43189889192581177},{"id":"https://openalex.org/keywords/intelligent-agent","display_name":"Intelligent agent","score":0.41262006759643555},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.10546600818634033}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8654356598854065},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.6645158529281616},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6084398031234741},{"id":"https://openalex.org/C2779182362","wikidata":"https://www.wikidata.org/wiki/Q17126187","display_name":"Session (web analytics)","level":2,"score":0.6054387092590332},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5701301097869873},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.4957951009273529},{"id":"https://openalex.org/C164120249","wikidata":"https://www.wikidata.org/wiki/Q995982","display_name":"Web search query","level":3,"score":0.4908490777015686},{"id":"https://openalex.org/C166423231","wikidata":"https://www.wikidata.org/wiki/Q1891170","display_name":"Semantic search","level":3,"score":0.4347694516181946},{"id":"https://openalex.org/C521815418","wikidata":"https://www.wikidata.org/wiki/Q4182287","display_name":"Web search engine","level":4,"score":0.43189889192581177},{"id":"https://openalex.org/C74072328","wikidata":"https://www.wikidata.org/wiki/Q1142726","display_name":"Intelligent agent","level":2,"score":0.41262006759643555},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.10546600818634033}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1183463.1183468","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1183463.1183468","pdf_url":null,"source":{"id":"https://openalex.org/S97833917","display_name":"ACM Transactions on Internet Technology","issn_l":"1533-5399","issn":["1533-5399","1557-6051"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Internet Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":83,"referenced_works":["https://openalex.org/W21704922","https://openalex.org/W104159351","https://openalex.org/W126924136","https://openalex.org/W139155661","https://openalex.org/W157562232","https://openalex.org/W1489992655","https://openalex.org/W1499407697","https://openalex.org/W1508511232","https://openalex.org/W1532852017","https://openalex.org/W1533748861","https://openalex.org/W1544423579","https://openalex.org/W1550771877","https://openalex.org/W1557857707","https://openalex.org/W1588320668","https://openalex.org/W1602709182","https://openalex.org/W1605488413","https://openalex.org/W1679844312","https://openalex.org/W1911857613","https://openalex.org/W1965910880","https://openalex.org/W1968155810","https://openalex.org/W1970990216","https://openalex.org/W1976015163","https://openalex.org/W1977089507","https://openalex.org/W1982889956","https://openalex.org/W1983305247","https://openalex.org/W1989771757","https://openalex.org/W1991755171","https://openalex.org/W1995101231","https://openalex.org/W1995554305","https://openalex.org/W1998196377","https://openalex.org/W2000145992","https://openalex.org/W2003980395","https://openalex.org/W2008959396","https://openalex.org/W2012516036","https://openalex.org/W2013538567","https://openalex.org/W2018928332","https://openalex.org/W2025016813","https://openalex.org/W2026080185","https://openalex.org/W2029500199","https://openalex.org/W2030453570","https://openalex.org/W2032117162","https://openalex.org/W2033289350","https://openalex.org/W2061121117","https://openalex.org/W2067018993","https://openalex.org/W2073853190","https://openalex.org/W2075210032","https://openalex.org/W2088287196","https://openalex.org/W2094187861","https://openalex.org/W2097166075","https://openalex.org/W2104772551","https://openalex.org/W2104876549","https://openalex.org/W2105533042","https://openalex.org/W2110766068","https://openalex.org/W2111523247","https://openalex.org/W2123329836","https://openalex.org/W2124449410","https://openalex.org/W2124673015","https://openalex.org/W2128302092","https://openalex.org/W2129347865","https://openalex.org/W2130316040","https://openalex.org/W2130416970","https://openalex.org/W2132461362","https://openalex.org/W2135499008","https://openalex.org/W2143237916","https://openalex.org/W2144169452","https://openalex.org/W2149397822","https://openalex.org/W2154498027","https://openalex.org/W2154724067","https://openalex.org/W2158030545","https://openalex.org/W2160368903","https://openalex.org/W2162502927","https://openalex.org/W2162985330","https://openalex.org/W2164542999","https://openalex.org/W2165376033","https://openalex.org/W2167075392","https://openalex.org/W2170714116","https://openalex.org/W2296324464","https://openalex.org/W2336993214","https://openalex.org/W2608239929","https://openalex.org/W4232534713","https://openalex.org/W4236543541","https://openalex.org/W4302039983","https://openalex.org/W6602279961"],"related_works":["https://openalex.org/W2359166167","https://openalex.org/W3590553","https://openalex.org/W3110844189","https://openalex.org/W4297963434","https://openalex.org/W2336826532","https://openalex.org/W2056608361","https://openalex.org/W2321599862","https://openalex.org/W2066869521","https://openalex.org/W2111960202","https://openalex.org/W2091420710"],"abstract_inverted_index":{"The":[0],"Web":[1,25,55,102,182,220],"has":[2],"become":[3],"a":[4,42,67,187],"worldwide":[5],"repository":[6],"of":[7,23,46,74,87,89,113,124,163,178,191,196,206],"information":[8,20,33,70,75,100,221],"which":[9],"individuals,":[10],"companies,":[11],"and":[12,79,111,126,135,202,223],"organizations":[13],"utilize":[14,27],"to":[15,30,141,172],"solve":[16],"or":[17],"address":[18],"various":[19],"problems.":[21],"Many":[22],"these":[24],"users":[26],"automated":[28],"agents":[29,56,97,130,183,222],"gather":[31],"this":[32,39,62,116],"for":[34,58,69,99,186,219],"them.":[35],"Some":[36],"assume":[37],"that":[38],"approach":[40],"represents":[41],"more":[43],"sophisticated":[44],"method":[45],"searching.":[47],"However,":[48],"there":[49],"is":[50],"little":[51,176],"research":[52],"investigating":[53,95],"how":[54,96],"search":[57,98,103,137,208,224],"online":[59],"information.":[60],"In":[61],"research,":[63],"we":[64,119],"first":[65],"provide":[66],"classification":[68],"agent":[71,80,155,168],"using":[72],"stages":[73],"gathering,":[76],"gathering":[77],"approaches,":[78],"architecture.":[81],"We":[82,215],"then":[83],"examine":[84],"an":[85],"implementation":[86],"one":[88],"the":[90,106,133,197,204,217],"resulting":[91],"classifications":[92],"in":[93],"detail,":[94],"on":[101],"engines,":[104],"including":[105],"session,":[107],"query,":[108],"term,":[109],"duration":[110,205],"frequency":[112],"interactions.":[114],"For":[115],"temporal":[117],"study,":[118],"analyzed":[120],"three":[121],"data":[122],"sets":[123],"queries":[125,146,169],"page":[127],"views":[128],"from":[129,139],"interacting":[131],"with":[132,160,175],"Excite":[134],"AltaVista":[136],"engines":[138],"1997":[140],"2002,":[142],"examining":[143],"approximately":[144],"900,000":[145],"submitted":[147],"by":[148],"over":[149],"3,000":[150],"agents.":[151],"Findings":[152],"include:":[153],"(1)":[154],"sessions":[156],"are":[157,170,184,200],"extremely":[158],"interactive,":[159],"sometimes":[161],"hundreds":[162],"interactions":[164],"per":[165],"second":[166],"(2)":[167],"comparable":[171],"human":[173],"searchers,":[174],"use":[177],"query":[179],"operators,":[180],"(3)":[181],"searching":[185],"relatively":[188],"limited":[189],"variety":[190],"information,":[192],"wherein":[193],"only":[194],"18%":[195],"terms":[198],"used":[199],"unique,":[201],"(4)":[203],"agent-Web":[207],"engine":[209],"interaction":[210],"typically":[211],"spans":[212],"several":[213],"hours.":[214],"discuss":[216],"implications":[218],"engines.":[225]},"counts_by_year":[{"year":2012,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
