{"id":"https://openalex.org/W2102815161","doi":"https://doi.org/10.1145/1247480.1247550","title":"A random walk approach to sampling hidden databases","display_name":"A random walk approach to sampling hidden databases","publication_year":2007,"publication_date":"2007-06-11","ids":{"openalex":"https://openalex.org/W2102815161","doi":"https://doi.org/10.1145/1247480.1247550","mag":"2102815161"},"language":"en","primary_location":{"id":"doi:10.1145/1247480.1247550","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1247480.1247550","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2007 ACM SIGMOD international conference on Management of data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057802863","display_name":"Arjun Dasgupta","orcid":null},"institutions":[{"id":"https://openalex.org/I189196454","display_name":"The University of Texas at Arlington","ror":"https://ror.org/019kgqr73","country_code":"US","type":"education","lineage":["https://openalex.org/I189196454"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Arjun Dasgupta","raw_affiliation_strings":["University of Texas at Arlington, Arlington, TX"],"affiliations":[{"raw_affiliation_string":"University of Texas at Arlington, Arlington, TX","institution_ids":["https://openalex.org/I189196454"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002203026","display_name":"Gautam Das","orcid":"https://orcid.org/0000-0002-4627-9065"},"institutions":[{"id":"https://openalex.org/I189196454","display_name":"The University of Texas at Arlington","ror":"https://ror.org/019kgqr73","country_code":"US","type":"education","lineage":["https://openalex.org/I189196454"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gautam Das","raw_affiliation_strings":["University of Texas at Arlington, Arlington, TX"],"affiliations":[{"raw_affiliation_string":"University of Texas at Arlington, Arlington, TX","institution_ids":["https://openalex.org/I189196454"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013005096","display_name":"Heikki Mannila","orcid":null},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Heikki Mannila","raw_affiliation_strings":["Helsinki University of Technology and University of Helsinki, Helsinki, Finland","Helsinki University of Technology and University of Helsinki, Helsinki, Finland#TAB#"],"affiliations":[{"raw_affiliation_string":"Helsinki University of Technology and University of Helsinki, Helsinki, Finland","institution_ids":["https://openalex.org/I133731052"]},{"raw_affiliation_string":"Helsinki University of Technology and University of Helsinki, Helsinki, Finland#TAB#","institution_ids":["https://openalex.org/I133731052"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5057802863"],"corresponding_institution_ids":["https://openalex.org/I189196454"],"apc_list":null,"apc_paid":null,"fwci":18.2229,"has_fulltext":false,"cited_by_count":90,"citation_normalized_percentile":{"value":0.98979121,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"629","last_page":"640"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7996320724487305},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.6125661134719849},{"id":"https://openalex.org/keywords/random-walk","display_name":"Random walk","score":0.5913736820220947},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5850923657417297},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.550847589969635},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5284901857376099},{"id":"https://openalex.org/keywords/probabilistic-database","display_name":"Probabilistic database","score":0.48727333545684814},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.47524285316467285},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.4573054611682892},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.43971163034439087},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.33914244174957275},{"id":"https://openalex.org/keywords/database-theory","display_name":"Database theory","score":0.2655615210533142},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.25138431787490845},{"id":"https://openalex.org/keywords/relational-database","display_name":"Relational database","score":0.2035052478313446},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1187683641910553},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.1066262423992157}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7996320724487305},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.6125661134719849},{"id":"https://openalex.org/C121194460","wikidata":"https://www.wikidata.org/wiki/Q856741","display_name":"Random walk","level":2,"score":0.5913736820220947},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5850923657417297},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.550847589969635},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5284901857376099},{"id":"https://openalex.org/C174539288","wikidata":"https://www.wikidata.org/wiki/Q7246853","display_name":"Probabilistic database","level":4,"score":0.48727333545684814},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.47524285316467285},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.4573054611682892},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.43971163034439087},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.33914244174957275},{"id":"https://openalex.org/C12439846","wikidata":"https://www.wikidata.org/wiki/Q4809258","display_name":"Database theory","level":3,"score":0.2655615210533142},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25138431787490845},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.2035052478313446},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1187683641910553},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.1066262423992157},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/1247480.1247550","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1247480.1247550","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2007 ACM SIGMOD international conference on Management of data","raw_type":"proceedings-article"},{"id":"pmh:oai:uta-ir.tdl.org:10106/96","is_oa":false,"landing_page_url":"https://www.uta.edu/ra/real/editprofile.php?onlyview=1&amp;pid=178","pdf_url":null,"source":{"id":"https://openalex.org/S4306400392","display_name":"UTA ResearchCommons (University of Texas Arlington)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I189196454","host_organization_name":"The University of Texas at Arlington","host_organization_lineage":["https://openalex.org/I189196454"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"M.S."},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.135.2940","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.135.2940","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://dspace.uta.edu/bitstream/10106/96/1/umi-uta-1678.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.95.2195","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.95.2195","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://arjundasgupta.com/Documents/Thesis.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1586825695","https://openalex.org/W1605217017","https://openalex.org/W1659541576","https://openalex.org/W2026896584","https://openalex.org/W2041912938","https://openalex.org/W2056760934","https://openalex.org/W2119885577","https://openalex.org/W2128941908","https://openalex.org/W2137845970","https://openalex.org/W2138309709","https://openalex.org/W2147164982","https://openalex.org/W2151065878","https://openalex.org/W2170188121","https://openalex.org/W2221553715","https://openalex.org/W4247346926","https://openalex.org/W6681574511","https://openalex.org/W6685116542"],"related_works":["https://openalex.org/W1774414873","https://openalex.org/W2000084727","https://openalex.org/W2744606804","https://openalex.org/W2103934874","https://openalex.org/W4238495367","https://openalex.org/W2125434953","https://openalex.org/W2749065928","https://openalex.org/W2128160835","https://openalex.org/W4288558710","https://openalex.org/W2952920395"],"abstract_inverted_index":{"A":[0],"large":[1],"part":[2],"of":[3,35,90,119],"the":[4,7,42,50,64,69,79,99,115],"data":[5,52],"on":[6],"World":[8],"Wide":[9],"Web":[10],"is":[11,82],"hidden":[12,21,37],"behind":[13],"form-like":[14],"interfaces.":[15],"These":[16],"interfaces":[17],"interact":[18],"with":[19],"a":[20,31,59,85,104],"back-end":[22],"database":[23,38],"to":[24,27,49,71,96,113],"provide":[25],"answers":[26],"user":[28],"queries.":[29],"Generating":[30],"uniform":[32],"random":[33,60,88],"sample":[34,72,100],"this":[36,55],"by":[39,68,102],"using":[40,103],"only":[41],"publicly":[43],"available":[44],"interface":[45,70],"gives":[46],"us":[47],"access":[48],"underlying":[51],"distribution.":[53],"In":[54],"paper,":[56],"we":[57],"propose":[58,94],"walk":[61],"scheme":[62],"over":[63],"query":[65,80],"space":[66,81],"provided":[67],"such":[73],"databases.":[74],"We":[75,92,109],"discuss":[76],"variants":[77],"where":[78],"visualized":[83],"as":[84],"fixed":[86],"and":[87,117],"ordering":[89],"attributes.":[91],"also":[93],"techniques":[95],"further":[97],"improve":[98],"quality":[101],"probabilistic":[105],"rejection":[106],"based":[107],"approach.":[108],"conduct":[110],"extensive":[111],"experiments":[112],"illustrate":[114],"accuracy":[116],"efficiency":[118],"our":[120],"techniques.":[121]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":9},{"year":2014,"cited_by_count":9},{"year":2013,"cited_by_count":8},{"year":2012,"cited_by_count":10}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
