{"id":"https://openalex.org/W2128941908","doi":"https://doi.org/10.1145/1135777.1135833","title":"Random sampling from a search engine's index","display_name":"Random sampling from a search engine's index","publication_year":2006,"publication_date":"2006-05-23","ids":{"openalex":"https://openalex.org/W2128941908","doi":"https://doi.org/10.1145/1135777.1135833","mag":"2128941908"},"language":"en","primary_location":{"id":"doi:10.1145/1135777.1135833","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1135777.1135833","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th international conference on World Wide Web","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054834255","display_name":"Ziv Bar-Yossef","orcid":null},"institutions":[{"id":"https://openalex.org/I174306211","display_name":"Technion \u2013 Israel Institute of Technology","ror":"https://ror.org/03qryx823","country_code":"IL","type":"education","lineage":["https://openalex.org/I174306211"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"Ziv Bar-Yossef","raw_affiliation_strings":["Technion, Haifa, Israel","Technion , Haifa , Israel"],"affiliations":[{"raw_affiliation_string":"Technion, Haifa, Israel","institution_ids":["https://openalex.org/I174306211"]},{"raw_affiliation_string":"Technion , Haifa , Israel","institution_ids":["https://openalex.org/I174306211"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086993894","display_name":"Maxim Gurevich","orcid":"https://orcid.org/0000-0003-4693-0556"},"institutions":[{"id":"https://openalex.org/I174306211","display_name":"Technion \u2013 Israel Institute of Technology","ror":"https://ror.org/03qryx823","country_code":"IL","type":"education","lineage":["https://openalex.org/I174306211"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Maxim Gurevich","raw_affiliation_strings":["Technion, Haifa, Israel","Technion , Haifa , Israel"],"affiliations":[{"raw_affiliation_string":"Technion, Haifa, Israel","institution_ids":["https://openalex.org/I174306211"]},{"raw_affiliation_string":"Technion , Haifa , Israel","institution_ids":["https://openalex.org/I174306211"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5054834255"],"corresponding_institution_ids":["https://openalex.org/I174306211"],"apc_list":null,"apc_paid":null,"fwci":26.7206,"has_fulltext":false,"cited_by_count":119,"citation_normalized_percentile":{"value":0.99668897,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"367","last_page":"376"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11152","display_name":"Stochastic processes and statistical mechanics","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/2610","display_name":"Mathematical Physics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.7062222957611084},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.697693407535553},{"id":"https://openalex.org/keywords/index","display_name":"Index (typography)","score":0.6703743934631348},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.6591992974281311},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.6112151741981506},{"id":"https://openalex.org/keywords/monte-carlo-method","display_name":"Monte Carlo method","score":0.47950631380081177},{"id":"https://openalex.org/keywords/sample-size-determination","display_name":"Sample size determination","score":0.45175424218177795},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.44220951199531555},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.44080352783203125},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.40412983298301697},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.32246220111846924},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1923806071281433},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.1374180018901825}],"concepts":[{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.7062222957611084},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.697693407535553},{"id":"https://openalex.org/C2777382242","wikidata":"https://www.wikidata.org/wiki/Q6017816","display_name":"Index (typography)","level":2,"score":0.6703743934631348},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.6591992974281311},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.6112151741981506},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.47950631380081177},{"id":"https://openalex.org/C129848803","wikidata":"https://www.wikidata.org/wiki/Q2564360","display_name":"Sample size determination","level":2,"score":0.45175424218177795},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.44220951199531555},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.44080352783203125},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.40412983298301697},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.32246220111846924},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1923806071281433},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.1374180018901825},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1135777.1135833","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1135777.1135833","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th international conference on World Wide Web","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W185324491","https://openalex.org/W1510634602","https://openalex.org/W1568495775","https://openalex.org/W1598759141","https://openalex.org/W1605217017","https://openalex.org/W1659541576","https://openalex.org/W1964038241","https://openalex.org/W1983416950","https://openalex.org/W2001351653","https://openalex.org/W2019473674","https://openalex.org/W2028716813","https://openalex.org/W2033057584","https://openalex.org/W2033747448","https://openalex.org/W2056760934","https://openalex.org/W2057767944","https://openalex.org/W2069739265","https://openalex.org/W2080676333","https://openalex.org/W2089657469","https://openalex.org/W2091082553","https://openalex.org/W2117850397","https://openalex.org/W2122141283","https://openalex.org/W2125125501","https://openalex.org/W2134711723","https://openalex.org/W2136059419","https://openalex.org/W2138309709","https://openalex.org/W2139964991","https://openalex.org/W2144959234","https://openalex.org/W2147164982","https://openalex.org/W2154707336","https://openalex.org/W2155711776","https://openalex.org/W2247055361","https://openalex.org/W2798909945","https://openalex.org/W3137614770","https://openalex.org/W6668080254","https://openalex.org/W6681574511"],"related_works":["https://openalex.org/W4293088233","https://openalex.org/W3152660226","https://openalex.org/W2393870460","https://openalex.org/W2496077116","https://openalex.org/W2254578859","https://openalex.org/W4284894156","https://openalex.org/W2161803855","https://openalex.org/W2900695351","https://openalex.org/W1986523067","https://openalex.org/W3082212156"],"abstract_inverted_index":{"We":[0,193],"revisit":[1],"a":[2,10,19,31,71,75,91,167],"problem":[3],"introduced":[4],"by":[5,90],"Bharat":[6,45],"and":[7,46,58,74,138,146,177,209],"Broder":[8,47],"almost":[9],"decade":[11],"ago:":[12],"how":[13],"to":[14,101,117,126,156,197],"sample":[15,83,87],"random":[16,76],"pages":[17],"from":[18,49,160],"search":[20,26,41,162],"engine's":[21,27,163],"index":[22],"using":[23],"only":[24],"the":[25,96,105,112,139,161,202],"public":[28],"interface?":[29],"Such":[30],"primitive":[32],"is":[33,88],"particularly":[34],"useful":[35],"in":[36,104,109],"creating":[37],"objective":[38],"benchmarks":[39],"for":[40],"engines.The":[42],"technique":[43,73],"of":[44,98,169,205],"suffers":[48],"two":[50,67],"well":[51,128],"recorded":[52],"biases:":[53],"it":[54],"favors":[55],"long":[56,188],"documents":[57,172],"highly":[59,190],"ranked":[60,191],"documents.":[61,192],"In":[62],"this":[63,99,122],"paper":[64],"we":[65,124],"introduce":[66],"novel":[68],"sampling":[69,137],"techniques:":[70],"lexicon-based":[72],"walk":[77],"technique.":[78],"Our":[79],"methods":[80,144],"produce":[81,157],"biased":[82],"documents,":[84],"but":[85],"each":[86],"accompanied":[89],"corresponding":[92],"\"weight\",":[93],"which":[94],"represents":[95],"probability":[97],"document":[100],"be":[102],"selected":[103],"sample.":[106],"The":[107],"samples,":[108],"conjunction":[110],"with":[111],"weights,":[113],"are":[114,154],"then":[115],"used":[116],"simulate":[118],"near-uniform":[119,158],"samples.":[120],"To":[121],"end,":[123],"resort":[125],"three":[127],"known":[129],"Monte":[130],"Carlo":[131],"simulation":[132],"methods:":[133],"rejection":[134],"sampling,":[135],"importance":[136],"Metropolis-Hastings":[140],"algorithm.We":[141],"analyze":[142],"our":[143,152,174,180,195],"rigorously":[145],"prove":[147],"that":[148,179],"under":[149],"plausible":[150],"assumptions,":[151],"techniques":[153],"guaranteed":[155],"samples":[159],"index.":[164],"Experiments":[165],"on":[166],"corpus":[168],"2.4":[170],"million":[171],"substantiate":[173],"analytical":[175],"findings":[176],"show":[178],"algorithms":[181,196],"do":[182],"not":[183],"have":[184],"significant":[185],"bias":[186],"towards":[187],"or":[189],"use":[194],"collect":[198],"fresh":[199],"data":[200],"about":[201],"relative":[203],"sizes":[204],"Google,":[206],"MSN":[207],"Search,":[208],"Yahoo!.":[210]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":8},{"year":2013,"cited_by_count":5},{"year":2012,"cited_by_count":16}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
