{"id":"https://openalex.org/W1977836056","doi":"https://doi.org/10.1145/1541822.1541823","title":"IRLbot","display_name":"IRLbot","publication_year":2009,"publication_date":"2009-06-01","ids":{"openalex":"https://openalex.org/W1977836056","doi":"https://doi.org/10.1145/1541822.1541823","mag":"1977836056"},"language":"en","primary_location":{"id":"doi:10.1145/1541822.1541823","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1541822.1541823","pdf_url":null,"source":{"id":"https://openalex.org/S131231701","display_name":"ACM Transactions on the Web","issn_l":"1559-1131","issn":["1559-1131","1559-114X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on the Web","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080290452","display_name":"Hsin-Tsang Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hsin-Tsang Lee","raw_affiliation_strings":["Texas A&amp;M University, College Station, TX","Texas A&M University, College Station. TX#TAB#"],"affiliations":[{"raw_affiliation_string":"Texas A&amp;M University, College Station, TX","institution_ids":["https://openalex.org/I91045830"]},{"raw_affiliation_string":"Texas A&M University, College Station. TX#TAB#","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055012567","display_name":"Derek Leonard","orcid":null},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Derek Leonard","raw_affiliation_strings":["Texas A&amp;M University, College Station, TX","Texas A&M University, College Station. TX#TAB#"],"affiliations":[{"raw_affiliation_string":"Texas A&amp;M University, College Station, TX","institution_ids":["https://openalex.org/I91045830"]},{"raw_affiliation_string":"Texas A&M University, College Station. TX#TAB#","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100377876","display_name":"Xiaoming Wang","orcid":"https://orcid.org/0000-0002-2075-9087"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaoming Wang","raw_affiliation_strings":["Texas A&amp;M University, College Station, TX","Texas A&M University, College Station. TX#TAB#"],"affiliations":[{"raw_affiliation_string":"Texas A&amp;M University, College Station, TX","institution_ids":["https://openalex.org/I91045830"]},{"raw_affiliation_string":"Texas A&M University, College Station. TX#TAB#","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076734500","display_name":"Dmitri Loguinov","orcid":"https://orcid.org/0000-0003-3876-1000"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dmitri Loguinov","raw_affiliation_strings":["Texas A&amp;M University, College Station, TX","Texas A&M University, College Station. TX#TAB#"],"affiliations":[{"raw_affiliation_string":"Texas A&amp;M University, College Station, TX","institution_ids":["https://openalex.org/I91045830"]},{"raw_affiliation_string":"Texas A&M University, College Station. TX#TAB#","institution_ids":["https://openalex.org/I91045830"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5080290452"],"corresponding_institution_ids":["https://openalex.org/I91045830"],"apc_list":null,"apc_paid":null,"fwci":11.6502,"has_fulltext":false,"cited_by_count":33,"citation_normalized_percentile":{"value":0.98208053,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"3","issue":"3","first_page":"1","last_page":"34"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9904000163078308,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8324594497680664},{"id":"https://openalex.org/keywords/crawling","display_name":"Crawling","score":0.7631824016571045},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5693964958190918},{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.5636014938354492},{"id":"https://openalex.org/keywords/download","display_name":"Download","score":0.5455436706542969},{"id":"https://openalex.org/keywords/scripting-language","display_name":"Scripting language","score":0.518844723701477},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.43934375047683716},{"id":"https://openalex.org/keywords/server","display_name":"Server","score":0.42578360438346863},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4162077307701111},{"id":"https://openalex.org/keywords/web-server","display_name":"Web server","score":0.4107546806335449},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.38869914412498474},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.3269308805465698},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.29737311601638794},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.19737598299980164}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8324594497680664},{"id":"https://openalex.org/C100368936","wikidata":"https://www.wikidata.org/wiki/Q1411725","display_name":"Crawling","level":2,"score":0.7631824016571045},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5693964958190918},{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.5636014938354492},{"id":"https://openalex.org/C2780154274","wikidata":"https://www.wikidata.org/wiki/Q7126717","display_name":"Download","level":2,"score":0.5455436706542969},{"id":"https://openalex.org/C61423126","wikidata":"https://www.wikidata.org/wiki/Q187432","display_name":"Scripting language","level":2,"score":0.518844723701477},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.43934375047683716},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.42578360438346863},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4162077307701111},{"id":"https://openalex.org/C11392498","wikidata":"https://www.wikidata.org/wiki/Q11288","display_name":"Web server","level":3,"score":0.4107546806335449},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.38869914412498474},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.3269308805465698},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.29737311601638794},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.19737598299980164},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1541822.1541823","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1541822.1541823","pdf_url":null,"source":{"id":"https://openalex.org/S131231701","display_name":"ACM Transactions on the Web","issn_l":"1559-1131","issn":["1559-1131","1559-114X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on the Web","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W110443600","https://openalex.org/W173713115","https://openalex.org/W1561637629","https://openalex.org/W1587314265","https://openalex.org/W1602486336","https://openalex.org/W1613836731","https://openalex.org/W1674850363","https://openalex.org/W1761577731","https://openalex.org/W1797803111","https://openalex.org/W1966912174","https://openalex.org/W1968155810","https://openalex.org/W1979126145","https://openalex.org/W1997438973","https://openalex.org/W2000273502","https://openalex.org/W2000333294","https://openalex.org/W2007687650","https://openalex.org/W2012833704","https://openalex.org/W2013531639","https://openalex.org/W2018928332","https://openalex.org/W2029500199","https://openalex.org/W2046441184","https://openalex.org/W2051804774","https://openalex.org/W2066636486","https://openalex.org/W2085922539","https://openalex.org/W2098660810","https://openalex.org/W2108671511","https://openalex.org/W2130242957","https://openalex.org/W2130610812","https://openalex.org/W2139532006","https://openalex.org/W2140279085","https://openalex.org/W2145349611","https://openalex.org/W2145990704","https://openalex.org/W2152565070","https://openalex.org/W2158601853","https://openalex.org/W2161118554","https://openalex.org/W2164542999","https://openalex.org/W2295141584","https://openalex.org/W4245503759","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2097808084","https://openalex.org/W2566024741","https://openalex.org/W2186697381","https://openalex.org/W2161927007","https://openalex.org/W3216588747","https://openalex.org/W2152505903","https://openalex.org/W2102475112","https://openalex.org/W4251780656","https://openalex.org/W2051135816","https://openalex.org/W2100464657"],"abstract_inverted_index":{"This":[0],"article":[1],"shares":[2],"our":[3,81,117],"experience":[4,132],"in":[5,41,74,123],"designing":[6],"a":[7,17,61,91,151],"Web":[8,155],"crawler":[9],"that":[10,27,84],"can":[11],"download":[12,109],"billions":[13],"of":[14,38,63,111,128,153],"pages":[15,100],"using":[16],"single-server":[18],"implementation":[19,76],"and":[20,51,70,105,135,149],"models":[21],"its":[22],"performance.":[23],"We":[24,58],"first":[25],"show":[26],"current":[28],"crawling":[29],"algorithms":[30,121],"cannot":[31],"effectively":[32],"cope":[33],"with":[34,67,120,157],"the":[35,154],"sheer":[36],"volume":[37],"URLs":[39],"generated":[40],"large":[42],"crawls,":[43],"highly":[44],"branching":[45],"spam,":[46],"legitimate":[47],"multimillion-page":[48],"blog":[49],"sites,":[50],"infinite":[52],"loops":[53],"created":[54],"by":[55],"server-side":[56],"scripts.":[57],"then":[59],"offer":[60],"set":[62],"techniques":[64],"for":[65],"dealing":[66],"these":[68],"issues":[69],"test":[71],"their":[72],"performance":[73],"an":[75,107],"we":[77],"call":[78],"IRLbot.":[79],"In":[80],"recent":[82],"experiment":[83],"lasted":[85],"41":[86,158],"days,":[87],"IRLbot":[88,129],"running":[89],"on":[90],"single":[92],"server":[93],"successfully":[94,136],"crawled":[95],"6.3":[96],"billion":[97,102,147,159],"valid":[98],"HTML":[99],"(7.6":[101],"connection":[103],"requests)":[104],"sustained":[106],"average":[108],"rate":[110],"319":[112],"mb/s":[113],"(1,789":[114],"pages/s).":[115],"Unlike":[116],"prior":[118],"experiments":[119],"proposed":[122],"related":[124],"work,":[125],"this":[126],"version":[127],"did":[130],"not":[131],"any":[133],"bottlenecks":[134],"handled":[137],"content":[138],"from":[139],"over":[140],"117":[141],"million":[142],"hosts,":[143],"parsed":[144],"out":[145],"394":[146],"links,":[148],"discovered":[150],"subset":[152],"graph":[156],"unique":[160],"nodes.":[161]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":4},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":3}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2016-06-24T00:00:00"}
