{"id":"https://openalex.org/W2949047719","doi":"https://doi.org/10.1145/3299869.3319899","title":"Progressive Deep Web Crawling Through Keyword Queries For Data Enrichment","display_name":"Progressive Deep Web Crawling Through Keyword Queries For Data Enrichment","publication_year":2019,"publication_date":"2019-06-18","ids":{"openalex":"https://openalex.org/W2949047719","doi":"https://doi.org/10.1145/3299869.3319899","mag":"2949047719"},"language":"en","primary_location":{"id":"doi:10.1145/3299869.3319899","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3299869.3319899","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2019 International Conference on Management of Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115593415","display_name":"Pei Wang","orcid":"https://orcid.org/0009-0007-3968-6918"},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Pei Wang","raw_affiliation_strings":["Simon Fraser University, Burnaby, BC, Canada"],"affiliations":[{"raw_affiliation_string":"Simon Fraser University, Burnaby, BC, Canada","institution_ids":["https://openalex.org/I18014758"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051613770","display_name":"Ryan Shea","orcid":"https://orcid.org/0000-0002-8213-2036"},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Ryan Shea","raw_affiliation_strings":["Simon Fraser University, Burnaby, BC, Canada"],"affiliations":[{"raw_affiliation_string":"Simon Fraser University, Burnaby, BC, Canada","institution_ids":["https://openalex.org/I18014758"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101850961","display_name":"Jiannan Wang","orcid":"https://orcid.org/0009-0002-8978-312X"},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jiannan Wang","raw_affiliation_strings":["Simon Fraser University, Burnaby, BC, Canada"],"affiliations":[{"raw_affiliation_string":"Simon Fraser University, Burnaby, BC, Canada","institution_ids":["https://openalex.org/I18014758"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049016095","display_name":"Eugene Wu","orcid":"https://orcid.org/0000-0003-4254-6688"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Eugene Wu","raw_affiliation_strings":["Columbia University, New York City, NY, USA"],"affiliations":[{"raw_affiliation_string":"Columbia University, New York City, NY, USA","institution_ids":["https://openalex.org/I78577930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5115593415"],"corresponding_institution_ids":["https://openalex.org/I18014758"],"apc_list":null,"apc_paid":null,"fwci":1.0174,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.82470017,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"229","last_page":"246"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9879999756813049,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8293834328651428},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.6056006550788879},{"id":"https://openalex.org/keywords/web-search-query","display_name":"Web search query","score":0.5761539936065674},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.56972736120224},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.534233570098877},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5318583846092224},{"id":"https://openalex.org/keywords/web-query-classification","display_name":"Web query classification","score":0.503755509853363},{"id":"https://openalex.org/keywords/spatial-query","display_name":"Spatial query","score":0.46550968289375305},{"id":"https://openalex.org/keywords/query-optimization","display_name":"Query optimization","score":0.45588386058807373},{"id":"https://openalex.org/keywords/crawling","display_name":"Crawling","score":0.43899115920066833},{"id":"https://openalex.org/keywords/view","display_name":"View","score":0.4323809742927551},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.41376280784606934},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4023439586162567},{"id":"https://openalex.org/keywords/database-design","display_name":"Database design","score":0.22075212001800537},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.177141934633255},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08144789934158325}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8293834328651428},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.6056006550788879},{"id":"https://openalex.org/C164120249","wikidata":"https://www.wikidata.org/wiki/Q995982","display_name":"Web search query","level":3,"score":0.5761539936065674},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.56972736120224},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.534233570098877},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5318583846092224},{"id":"https://openalex.org/C118689300","wikidata":"https://www.wikidata.org/wiki/Q7978614","display_name":"Web query classification","level":4,"score":0.503755509853363},{"id":"https://openalex.org/C172722865","wikidata":"https://www.wikidata.org/wiki/Q2302053","display_name":"Spatial query","level":5,"score":0.46550968289375305},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.45588386058807373},{"id":"https://openalex.org/C100368936","wikidata":"https://www.wikidata.org/wiki/Q1411725","display_name":"Crawling","level":2,"score":0.43899115920066833},{"id":"https://openalex.org/C54239708","wikidata":"https://www.wikidata.org/wiki/Q1329910","display_name":"View","level":3,"score":0.4323809742927551},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.41376280784606934},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4023439586162567},{"id":"https://openalex.org/C148840519","wikidata":"https://www.wikidata.org/wiki/Q1049878","display_name":"Database design","level":2,"score":0.22075212001800537},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.177141934633255},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08144789934158325},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3299869.3319899","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3299869.3319899","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2019 International Conference on Management of Data","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/17","display_name":"Partnerships for the goals","score":0.47999998927116394}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W291262356","https://openalex.org/W1586176254","https://openalex.org/W1680189815","https://openalex.org/W1721994796","https://openalex.org/W1967932772","https://openalex.org/W1969621019","https://openalex.org/W1981217233","https://openalex.org/W1989361423","https://openalex.org/W1989856433","https://openalex.org/W1996505782","https://openalex.org/W2016753842","https://openalex.org/W2018816493","https://openalex.org/W2028574166","https://openalex.org/W2031250218","https://openalex.org/W2035168844","https://openalex.org/W2035274534","https://openalex.org/W2041771431","https://openalex.org/W2043499927","https://openalex.org/W2064853889","https://openalex.org/W2066806792","https://openalex.org/W2081948558","https://openalex.org/W2083293881","https://openalex.org/W2085433073","https://openalex.org/W2102815161","https://openalex.org/W2104042955","https://openalex.org/W2104987630","https://openalex.org/W2115022330","https://openalex.org/W2115457429","https://openalex.org/W2117058208","https://openalex.org/W2129817180","https://openalex.org/W2131006463","https://openalex.org/W2140116426","https://openalex.org/W2143079043","https://openalex.org/W2148738951","https://openalex.org/W2166983036","https://openalex.org/W2170188121","https://openalex.org/W2170971772","https://openalex.org/W2173431721","https://openalex.org/W2188138540","https://openalex.org/W2544486974","https://openalex.org/W2740592503","https://openalex.org/W2798520647","https://openalex.org/W3101556001","https://openalex.org/W3118655244","https://openalex.org/W4252403066","https://openalex.org/W6635193281","https://openalex.org/W6685116542","https://openalex.org/W6902931378"],"related_works":["https://openalex.org/W2186703450","https://openalex.org/W227397245","https://openalex.org/W1533255077","https://openalex.org/W175663584","https://openalex.org/W2584018254","https://openalex.org/W2200784651","https://openalex.org/W2148237846","https://openalex.org/W4381740310","https://openalex.org/W2006271460","https://openalex.org/W2101552389"],"abstract_inverted_index":{"Data":[0],"enrichment":[1],"is":[2,50,145,169,185],"the":[3,28,58,63,68,107,122,127,130,135,153,174,178,191,198,208,213,218,226,230,260,266],"act":[4],"of":[5,70,116,129,138,155,193,200,229],"extending":[6],"a":[7,22,32,36,41,77,85,95,102,114,163],"local":[8,42,108,139,156,179,201,261],"database":[9,43,124,262],"with":[10],"new":[11,86],"attributes":[12],"from":[13],"external":[14],"data":[15,59],"sources.":[16],"In":[17,80],"this":[18,184],"paper,":[19],"we":[20,82],"study":[21,222],"novel":[23],"problem-how":[24],"to":[25,39,88,121,147,170,189,224,239,265],"progressively":[26],"crawl":[27],"deep":[29],"web":[30],"(i.e.,":[31],"hidden":[33,123,209,219,252],"database)":[34],"through":[35],"keyword-search":[37],"API":[38],"enrich":[40],"in":[44,177,207],"an":[45],"e":[46,92,235],"ective":[47,187,236],"way.":[48],"This":[49],"chal-":[51],"lenging":[52],"because":[53],"these":[54],"interfaces":[55],"often":[56],"limit":[57],"access":[60],"by":[61,162,217],"enforcing":[62],"top-k":[64,214],"constraint":[65,215],"or":[66],"limiting":[67],"number":[69,137,154,199],"queries":[71,120],"that":[72,126,158,183,203,246],"can":[73,133,159],"be":[74,160,205],"issued":[75],"within":[76],"time":[78],"window.":[79],"response,":[81],"propose":[83,234],"SmartCrawl,":[84],"framework":[87],"collect":[89],"re-":[90],"sults":[91],"ectively.":[93],"Given":[94],"query":[96,103,131,149,175],"budget":[97],"b,":[98],"SmartCrawl":[99,254],"rst":[100],"constructs":[101],"pool":[104],"based":[105],"on":[106,247],"database,":[109,210],"and":[110,211,233,250],"then":[111],"iteratively":[112],"issues":[113],"set":[115],"most":[117],"bene":[118,150],"cial":[119],"such":[125],"union":[128],"results":[132,244],"cover":[134],"maximum":[136],"records.":[140],"The":[141,242],"key":[142],"technical":[143],"challenge":[144],"how":[146,223],"estimate":[148,171],"t,":[151],"i.e.,":[152],"records":[157,202],"covered":[161],"given":[164],"query.":[165],"A":[166],"simple":[167],"approach":[168],"it":[172],"as":[173,263],"frequency":[176],"database.":[180,220],"We":[181,221],"nd":[182],"ine":[186],"due":[188],"i)":[190],"impact":[192],"|\u0394D|,":[194],"where":[195],"|\u0394D|":[196],"represents":[197],"cannot":[204],"found":[206],"ii)":[212],"enforced":[216],"mitigate":[225],"negative":[227],"impacts":[228],"two":[231],"factors":[232],"optimization":[237],"techniques":[238],"improve":[240],"performance.":[241],"experimental":[243],"show":[245],"both":[248],"simulated":[249],"real-world":[251],"databases,":[253],"signi":[255],"cantly":[256],"increases":[257],"coverage":[258],"over":[259],"compared":[264],"baselines.":[267]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
