{"id":"https://openalex.org/W1982144664","doi":"https://doi.org/10.1145/2089016.2089030","title":"Measuring redundancy level on the web","display_name":"Measuring redundancy level on the web","publication_year":2011,"publication_date":"2011-11-09","ids":{"openalex":"https://openalex.org/W1982144664","doi":"https://doi.org/10.1145/2089016.2089030","mag":"1982144664"},"language":"en","primary_location":{"id":"doi:10.1145/2089016.2089030","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2089016.2089030","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 7th Asian Internet Engineering Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063486829","display_name":"Alexander Afanasyev","orcid":"https://orcid.org/0000-0003-0420-1267"},"institutions":[{"id":"https://openalex.org/I2799798094","display_name":"UCLA Health","ror":"https://ror.org/01d88se56","country_code":"US","type":"funder","lineage":["https://openalex.org/I2799798094"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Alexander Afanasyev","raw_affiliation_strings":["UCLA","UCLA ,"],"affiliations":[{"raw_affiliation_string":"UCLA","institution_ids":["https://openalex.org/I2799798094"]},{"raw_affiliation_string":"UCLA ,","institution_ids":["https://openalex.org/I2799798094"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028740131","display_name":"Chunyi Peng","orcid":"https://orcid.org/0000-0002-2361-2224"},"institutions":[{"id":"https://openalex.org/I2799798094","display_name":"UCLA Health","ror":"https://ror.org/01d88se56","country_code":"US","type":"funder","lineage":["https://openalex.org/I2799798094"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chunyi Peng","raw_affiliation_strings":["UCLA","UCLA ,"],"affiliations":[{"raw_affiliation_string":"UCLA","institution_ids":["https://openalex.org/I2799798094"]},{"raw_affiliation_string":"UCLA ,","institution_ids":["https://openalex.org/I2799798094"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084023151","display_name":"Jiangzhe Wang","orcid":"https://orcid.org/0000-0001-8100-4529"},"institutions":[{"id":"https://openalex.org/I2799798094","display_name":"UCLA Health","ror":"https://ror.org/01d88se56","country_code":"US","type":"funder","lineage":["https://openalex.org/I2799798094"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiangzhe Wang","raw_affiliation_strings":["UCLA","UCLA ,"],"affiliations":[{"raw_affiliation_string":"UCLA","institution_ids":["https://openalex.org/I2799798094"]},{"raw_affiliation_string":"UCLA ,","institution_ids":["https://openalex.org/I2799798094"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5116294214","display_name":"Lixia Zhang","orcid":"https://orcid.org/0000-0003-0701-757X"},"institutions":[{"id":"https://openalex.org/I2799798094","display_name":"UCLA Health","ror":"https://ror.org/01d88se56","country_code":"US","type":"funder","lineage":["https://openalex.org/I2799798094"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lixia Zhang","raw_affiliation_strings":["UCLA","UCLA ,"],"affiliations":[{"raw_affiliation_string":"UCLA","institution_ids":["https://openalex.org/I2799798094"]},{"raw_affiliation_string":"UCLA ,","institution_ids":["https://openalex.org/I2799798094"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5063486829"],"corresponding_institution_ids":["https://openalex.org/I2799798094"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.13276309,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"81","last_page":"88"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7594928741455078},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.7412557005882263},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.730249285697937},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.6861240267753601},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6729668974876404},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.6104675531387329},{"id":"https://openalex.org/keywords/phrase","display_name":"Phrase","score":0.5168905258178711},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.51319819688797},{"id":"https://openalex.org/keywords/web-content","display_name":"Web content","score":0.5120659470558167},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4938131272792816},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.16874536871910095}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7594928741455078},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.7412557005882263},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.730249285697937},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.6861240267753601},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6729668974876404},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.6104675531387329},{"id":"https://openalex.org/C2776224158","wikidata":"https://www.wikidata.org/wiki/Q187931","display_name":"Phrase","level":2,"score":0.5168905258178711},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.51319819688797},{"id":"https://openalex.org/C2776324614","wikidata":"https://www.wikidata.org/wiki/Q3948731","display_name":"Web content","level":3,"score":0.5120659470558167},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4938131272792816},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.16874536871910095},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2089016.2089030","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2089016.2089030","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 7th Asian Internet Engineering Conference","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.710.4036","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.710.4036","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://lasr.cs.ucla.edu/afanasyev/data/files/Afanasyev/aintec-search.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W155403277","https://openalex.org/W192724328","https://openalex.org/W1596691921","https://openalex.org/W1729562701","https://openalex.org/W2007842132","https://openalex.org/W2012833704","https://openalex.org/W2019264297","https://openalex.org/W2085922539","https://openalex.org/W2152565070"],"related_works":["https://openalex.org/W2039546652","https://openalex.org/W2012262991","https://openalex.org/W4255475599","https://openalex.org/W2544674189","https://openalex.org/W2897171874","https://openalex.org/W1987716395","https://openalex.org/W2550808318","https://openalex.org/W4346570","https://openalex.org/W2278505189","https://openalex.org/W2367003870"],"abstract_inverted_index":{"This":[0,146],"paper":[1],"tries":[2],"to":[3,64,106,120],"estimate":[4],"redundancy":[5,98],"level":[6,128],"on":[7],"the":[8,35,44,55,58,71,76,80,100,111,144],"Web":[9],"by":[10,114],"employing":[11],"information":[12],"collected":[13,29],"from":[14,54,85],"existent":[15],"search":[16,66,115],"engines.":[17],"To":[18],"make":[19],"measurements":[20],"feasible,":[21],"a":[22,31,49,94,125],"representative":[23],"set":[24,45],"of":[25,34,57,73,93,99,129,143,152,171],"Internet":[26,36],"sites":[27],"was":[28,46],"using":[30,48],"random":[32,50],"sampling":[33],"catalogs":[37],"DMOZ":[38],"and":[39,69,109,162],"Delicious.":[40],"Each":[41],"page":[42],"in":[43],"identified":[47],"32-word":[51],"phrase":[52],"extracted":[53],"content":[56,134,154,160],"page.":[59],"These":[60],"phrases":[61,102],"were":[62],"used":[63],"perform":[65],"engine":[67],"queries":[68],"infer":[70],"number":[72],"pages":[74,112],"with":[75,136],"same":[77],"content.":[78],"Though":[79],"presented":[81],"method":[82],"is":[83],"far":[84],"being":[86],"perfectly":[87],"accurate,":[88],"it":[89],"provides":[90],"an":[91],"approximation":[92],"lower-bound":[95],"for":[96,141],"visible":[97,119],"web---long":[101],"will":[103],"likely":[104],"belong":[105],"duplicate":[107],"pages,":[108],"only":[110],"indexed":[113],"engines":[116],"are":[117,164],"really":[118],"users.":[121],"Obtained":[122],"results":[123],"showed":[124],"surprisingly":[126],"low":[127],"duplication":[130,161],"averaged":[131],"over":[132],"all":[133,169],"types,":[135],"less":[137],"then":[138],"ten":[139],"duplicates":[140],"most":[142],"pages.":[145],"indicates":[147],"that":[148],"besides":[149],"well-known":[150],"classes":[151],"high-redundant":[153],"(news,":[155],"mailing":[156],"list":[157],"archives,":[158],"etc.),":[159],"plagiarism":[163],"not":[165],"globally":[166],"widespread":[167],"across":[168],"types":[170],"webpages.":[172]},"counts_by_year":[],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
