{"id":"https://openalex.org/W2111874851","doi":"https://doi.org/10.1109/icnp.2002.1181397","title":"Clustering Web content for efficient replication","display_name":"Clustering Web content for efficient replication","publication_year":2003,"publication_date":"2003-08-27","ids":{"openalex":"https://openalex.org/W2111874851","doi":"https://doi.org/10.1109/icnp.2002.1181397","mag":"2111874851"},"language":"en","primary_location":{"id":"doi:10.1109/icnp.2002.1181397","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icnp.2002.1181397","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"10th IEEE International Conference on Network Protocols, 2002. Proceedings.","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100703977","display_name":"Yan Chen","orcid":"https://orcid.org/0000-0002-3872-0586"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yan Chen","raw_affiliation_strings":["University of California Berkeley, USA","California Univ., Berkeley, CA USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California Berkeley, USA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"California Univ., Berkeley, CA USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032760128","display_name":"Lili Qiu","orcid":"https://orcid.org/0000-0002-1590-9749"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Lili Qiu","raw_affiliation_strings":["Microsoft Research Limited, UK","Microsoft , USA "],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research Limited, UK","institution_ids":["https://openalex.org/I4210164937"]},{"raw_affiliation_string":"Microsoft , USA ","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100373305","display_name":"Wei-Yu Chen","orcid":"https://orcid.org/0000-0003-2958-8437"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weiyu Chen","raw_affiliation_strings":["University of California Berkeley, USA","University of California at Berkeley"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California Berkeley, USA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"University of California at Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051151589","display_name":"Luan Viet Nguyen","orcid":"https://orcid.org/0000-0001-5516-2443"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Luan Nguyen","raw_affiliation_strings":["University of California Berkeley, USA","University of California at Berkeley"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California Berkeley, USA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"University of California at Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109560331","display_name":"Randy H. Katz","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"R.H. Katz","raw_affiliation_strings":["University of California Berkeley, USA","University of California at Berkeley"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California Berkeley, USA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"University of California at Berkeley","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.2302,"has_fulltext":false,"cited_by_count":31,"citation_normalized_percentile":{"value":0.93179884,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"165","last_page":"174"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10742","display_name":"Peer-to-Peer Network Technologies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11896","display_name":"Opportunistic and Delay-Tolerant Networks","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.852546751499176},{"id":"https://openalex.org/keywords/replication","display_name":"Replication (statistics)","score":0.7014800310134888},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6798690557479858},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5650158524513245},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.4645025432109833},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4596107006072998},{"id":"https://openalex.org/keywords/content-delivery-network","display_name":"Content delivery network","score":0.4518851041793823},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4511609673500061},{"id":"https://openalex.org/keywords/replica","display_name":"Replica","score":0.439470112323761},{"id":"https://openalex.org/keywords/server","display_name":"Server","score":0.2956797480583191},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.14427676796913147},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.13192540407180786}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.852546751499176},{"id":"https://openalex.org/C12590798","wikidata":"https://www.wikidata.org/wiki/Q3933199","display_name":"Replication (statistics)","level":2,"score":0.7014800310134888},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6798690557479858},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5650158524513245},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.4645025432109833},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4596107006072998},{"id":"https://openalex.org/C2778898579","wikidata":"https://www.wikidata.org/wiki/Q72588","display_name":"Content delivery network","level":3,"score":0.4518851041793823},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4511609673500061},{"id":"https://openalex.org/C2775937380","wikidata":"https://www.wikidata.org/wiki/Q1232589","display_name":"Replica","level":2,"score":0.439470112323761},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.2956797480583191},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.14427676796913147},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.13192540407180786},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icnp.2002.1181397","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icnp.2002.1181397","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"10th IEEE International Conference on Network Protocols, 2002. Proceedings.","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.4300000071525574}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1253666328","https://openalex.org/W1575476631","https://openalex.org/W1836071589","https://openalex.org/W1973264045","https://openalex.org/W2042294443","https://openalex.org/W2046878543","https://openalex.org/W2080068757","https://openalex.org/W2095897464","https://openalex.org/W2111874851","https://openalex.org/W2124088278","https://openalex.org/W2132987358","https://openalex.org/W2134529269","https://openalex.org/W2134549628","https://openalex.org/W2145167201","https://openalex.org/W2155659292","https://openalex.org/W2167776925","https://openalex.org/W2174999175","https://openalex.org/W2481666560","https://openalex.org/W2999729612","https://openalex.org/W3020048185","https://openalex.org/W4206214262","https://openalex.org/W4231029117","https://openalex.org/W4298882835","https://openalex.org/W6628286356","https://openalex.org/W6679418958","https://openalex.org/W6685489791"],"related_works":["https://openalex.org/W1520094959","https://openalex.org/W1786922237","https://openalex.org/W3080389706","https://openalex.org/W2754714240","https://openalex.org/W2122589784","https://openalex.org/W2145367253","https://openalex.org/W3115257493","https://openalex.org/W2760372971","https://openalex.org/W4381803269","https://openalex.org/W2015527417"],"abstract_inverted_index":{"Recently,":[0],"there":[1],"has":[2],"been":[3],"an":[4],"increasing":[5],"deployment":[6],"of":[7,25,50,76,90,108,159,206],"content":[8,18,61,73,105,188],"distribution":[9],"networks":[10],"(CDNs)":[11],"that":[12,71,121,180],"offer":[13],"hosting":[14],"services":[15],"to":[16,43,59,62,86,103,116,140,185,203,229],"Web":[17,26,91,137,151],"providers.":[19],"We":[20,101,125,175,190],"first":[21],"compare":[22],"uncooperative":[23],"pulling":[24],"contents,":[27],"used":[28],"by":[29,119,225],"commercial":[30],"CDNs,":[31],"with":[32,47],"cooperative":[33],"pushing.":[34],"The":[35,198,214],"latter":[36],"can":[37,78,162],"achieve":[38],"user":[39],"perceived":[40],"performance":[41,205],"comparable":[42],"the":[44,157,166,186,204,207,222],"former":[45],"scheme":[46],"only":[48],"4-5%":[49],"replication":[51,97,145,219,231],"and":[52,130,134,168,194,218,232],"update":[53],"traffic.":[54],"Therefore,":[55],"we":[56,69,161],"explore":[57,177],"how":[58],"push":[60],"CDN":[63],"nodes":[64],"efficiently.":[65],"Using":[66],"trace-driven":[67],"simulation,":[68],"show":[70],"replicating":[72,87],"in":[74,82,88,106],"units":[75,89,107],"URLs":[77],"yield":[79,201],"60-70%":[80],"reduction":[81],"clients'":[83],"latency,":[84],"compared":[85,228],"sites.":[92],"However,":[93],"such":[94],"a":[95],"fine-grained":[96],"is":[98,237],"very":[99],"expensive.":[100],"propose":[102],"replicate":[104],"clusters,":[109,160],"each":[110],"containing":[111],"objects":[112],"which":[113],"are":[114,122],"likely":[115],"be":[117],"requested":[118],"clients":[120],"topologically":[123],"close.":[124],"describe":[126],"three":[127],"clustering":[128,217],"techniques,":[129],"use":[131],"various":[132],"topologies":[133],"several":[135],"large":[136],"server":[138],"traces":[139],"evaluate":[141],"their":[142],"performance.":[143,174],"Cluster-based":[144],"achieves":[146],"40-60%":[147],"improvement":[148],"over":[149],"per":[150],"site":[152],"based":[153],"replication.":[154],"By":[155],"adjusting":[156],"number":[158],"smoothly":[163],"trade":[164],"off":[165],"management":[167],"computation":[169],"cost":[170,224],"for":[171,240],"better":[172],"client":[173],"also":[176],"incremental":[178,196,216],"clusterings":[179,200],"adaptively":[181],"add":[182],"new":[183],"documents":[184],"existing":[187],"clusters.":[189],"examine":[191],"both":[192],"offline":[193,199],"online":[195,215],"clusterings.":[197],"close":[202],"complete":[208],"re-clustering":[209],"at":[210],"much":[211],"lower":[212],"overhead.":[213],"cut":[220],"down":[221],"retrieval":[223],"4.6-8":[226],"times":[227],"no":[230],"random":[233],"replication,":[234],"so":[235],"it":[236],"especially":[238],"useful":[239],"improving":[241],"document":[242],"availability":[243],"during":[244],"flash":[245],"crowds.":[246]},"counts_by_year":[{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
