{"id":"https://openalex.org/W4313371817","doi":"https://doi.org/10.1145/2949741.2949756","title":"DeepDive","display_name":"DeepDive","publication_year":2016,"publication_date":"2016-06-02","ids":{"openalex":"https://openalex.org/W4313371817","doi":"https://doi.org/10.1145/2949741.2949756"},"language":"en","primary_location":{"id":"doi:10.1145/2949741.2949756","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2949741.2949756","pdf_url":null,"source":{"id":"https://openalex.org/S47508943","display_name":"ACM SIGMOD Record","issn_l":"0163-5808","issn":["0163-5808","1943-5835"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGMOD Record","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041869459","display_name":"Christopher De","orcid":"https://orcid.org/0000-0002-3610-2696"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Christopher De Sa","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067607533","display_name":"Alex Ratner","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alex Ratner","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109519525","display_name":"Christopher R\u00e9","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher R\u00e9","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101727253","display_name":"Jaeho Shin","orcid":"https://orcid.org/0000-0001-5280-3356"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jaeho Shin","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100778976","display_name":"Feiran Wang","orcid":"https://orcid.org/0000-0003-4867-7400"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Feiran Wang","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033975637","display_name":"Sen Wu","orcid":"https://orcid.org/0000-0002-6133-4122"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sen Wu","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100383731","display_name":"Ce Zhang","orcid":"https://orcid.org/0000-0002-8105-7505"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ce Zhang","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5041869459"],"corresponding_institution_ids":["https://openalex.org/I97018004"],"apc_list":null,"apc_paid":null,"fwci":7.8992,"has_fulltext":false,"cited_by_count":61,"citation_normalized_percentile":{"value":0.97463326,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"45","issue":"1","first_page":"60","last_page":"67"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9075638055801392},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6909204721450806},{"id":"https://openalex.org/keywords/sql","display_name":"SQL","score":0.609372079372406},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5529747009277344},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5512186884880066},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.4414832293987274},{"id":"https://openalex.org/keywords/data-integration","display_name":"Data integration","score":0.43736886978149414},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.43519020080566406},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.42636755108833313},{"id":"https://openalex.org/keywords/inference-engine","display_name":"Inference engine","score":0.41829657554626465},{"id":"https://openalex.org/keywords/knowledge-base","display_name":"Knowledge base","score":0.41550740599632263},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3623940944671631},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.349701464176178},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3219382166862488}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9075638055801392},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6909204721450806},{"id":"https://openalex.org/C510870499","wikidata":"https://www.wikidata.org/wiki/Q47607","display_name":"SQL","level":2,"score":0.609372079372406},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5529747009277344},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5512186884880066},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.4414832293987274},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.43736886978149414},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.43519020080566406},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.42636755108833313},{"id":"https://openalex.org/C46743427","wikidata":"https://www.wikidata.org/wiki/Q1341685","display_name":"Inference engine","level":3,"score":0.41829657554626465},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.41550740599632263},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3623940944671631},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.349701464176178},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3219382166862488},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2949741.2949756","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2949741.2949756","pdf_url":null,"source":{"id":"https://openalex.org/S47508943","display_name":"ACM SIGMOD Record","issn_l":"0163-5808","issn":["0163-5808","1943-5835"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGMOD Record","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.6399999856948853}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W1435924991","https://openalex.org/W1565102206","https://openalex.org/W1788418780","https://openalex.org/W1934084512","https://openalex.org/W1962705364","https://openalex.org/W1965685479","https://openalex.org/W2006149654","https://openalex.org/W2045495924","https://openalex.org/W2052569640","https://openalex.org/W2068737686","https://openalex.org/W2079299255","https://openalex.org/W2081210343","https://openalex.org/W2091125641","https://openalex.org/W2093808275","https://openalex.org/W2100007248","https://openalex.org/W2101108755","https://openalex.org/W2110367654","https://openalex.org/W2115461474","https://openalex.org/W2118038484","https://openalex.org/W2129207602","https://openalex.org/W2129629757","https://openalex.org/W2133134975","https://openalex.org/W2135209143","https://openalex.org/W2135912864","https://openalex.org/W2138204945","https://openalex.org/W2144810465","https://openalex.org/W2167571757","https://openalex.org/W4230201548","https://openalex.org/W4293052541","https://openalex.org/W4299094297","https://openalex.org/W4299551239","https://openalex.org/W6635684370","https://openalex.org/W6680029872","https://openalex.org/W6684249991","https://openalex.org/W6712028060","https://openalex.org/W6841455387"],"related_works":["https://openalex.org/W2395929705","https://openalex.org/W4375958074","https://openalex.org/W2372304001","https://openalex.org/W2376097826","https://openalex.org/W1498088200","https://openalex.org/W2991054756","https://openalex.org/W2395092600","https://openalex.org/W2382052616","https://openalex.org/W2359127710","https://openalex.org/W1996884437"],"abstract_inverted_index":{"The":[0,63],"dark":[1],"data":[2,20,41,81],"extraction":[3],"or":[4,113],"knowledge":[5],"base":[6],"construction":[7,148],"(KBC)":[8],"problem":[9,32],"is":[10,29,68,125],"to":[11,58,78,126,130,146],"populate":[12],"a":[13,30,49,89],"SQL":[14],"database":[15,53],"with":[16],"information":[17],"from":[18],"unstructured":[19],"sources":[21],"including":[22],"emails,":[23],"webpages,":[24],"and":[25,35,44,54,72,86,91,141],"pdf":[26],"reports.":[27],"KBC":[28,61,134,150],"long-standing":[31],"in":[33,66,83,88,99],"industry":[34],"research":[36],"that":[37,51,69,100],"encompasses":[38],"problems":[39,82],"of":[40,143,149],"extraction,":[42,84],"cleaning,":[43,85],"integration.":[45],"We":[46,136],"describe":[47],"DeepDive,":[48],"system":[50],"combines":[52],"machine":[55,73],"learning":[56,74],"ideas":[57],"help":[59],"develop":[60],"systems.":[62,135,151],"key":[64,76,119],"idea":[65],"DeepDive":[67,95,144],"statistical":[70],"inference":[71,105],"are":[75,97],"tools":[77],"attack":[79],"classical":[80],"integration":[87],"unified":[90],"more":[92],"effective":[93],"manner.":[94],"programs":[96],"declarative":[98],"one":[101,108],"cannot":[102],"write":[103],"probabilistic":[104],"algorithms;":[106],"instead,":[107],"interacts":[109],"by":[110],"defining":[111],"features":[112],"rules":[114],"about":[115],"the":[116,138],"domain.":[117],"A":[118],"reason":[120],"for":[121],"this":[122],"design":[123],"choice":[124],"enable":[127],"domain":[128],"experts":[129],"build":[131],"their":[132],"own":[133],"present":[137],"applications,":[139],"abstractions,":[140],"techniques":[142],"employed":[145],"accelerate":[147]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":15},{"year":2019,"cited_by_count":7},{"year":2018,"cited_by_count":9},{"year":2017,"cited_by_count":5},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2023-01-06T00:00:00"}
