{"id":"https://openalex.org/W2608353553","doi":"https://doi.org/10.1145/3060586","title":"DeepDive","display_name":"DeepDive","publication_year":2017,"publication_date":"2017-04-24","ids":{"openalex":"https://openalex.org/W2608353553","doi":"https://doi.org/10.1145/3060586","mag":"2608353553"},"language":"en","primary_location":{"id":"doi:10.1145/3060586","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3060586","pdf_url":null,"source":{"id":"https://openalex.org/S103482838","display_name":"Communications of the ACM","issn_l":"0001-0782","issn":["0001-0782","1557-7317"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications of the ACM","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100383731","display_name":"Ce Zhang","orcid":"https://orcid.org/0000-0002-8105-7505"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Ce Zhang","raw_affiliation_strings":["ETH Zurich, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103852640","display_name":"Christopher R\u00e9","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher R\u00e9","raw_affiliation_strings":["Stanford University, Stanford, CA"],"affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039133265","display_name":"Michael Cafarella","orcid":"https://orcid.org/0000-0001-6122-0590"},"institutions":[{"id":"https://openalex.org/I116921496","display_name":"Lattice Semiconductor (United States)","ror":"https://ror.org/01hght844","country_code":"US","type":"company","lineage":["https://openalex.org/I116921496"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Cafarella","raw_affiliation_strings":["Lattice Data, Inc., Palo Alto, CA"],"affiliations":[{"raw_affiliation_string":"Lattice Data, Inc., Palo Alto, CA","institution_ids":["https://openalex.org/I116921496"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041869459","display_name":"Christopher De","orcid":"https://orcid.org/0000-0002-3610-2696"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher De Sa","raw_affiliation_strings":["Stanford University, Stanford, CA"],"affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067607533","display_name":"Alex Ratner","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alex Ratner","raw_affiliation_strings":["Stanford University, Stanford, CA"],"affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101727254","display_name":"Jaeho Shin","orcid":"https://orcid.org/0000-0002-7739-7195"},"institutions":[{"id":"https://openalex.org/I116921496","display_name":"Lattice Semiconductor (United States)","ror":"https://ror.org/01hght844","country_code":"US","type":"company","lineage":["https://openalex.org/I116921496"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jaeho Shin","raw_affiliation_strings":["Lattice Data, Inc., Palo Alto, CA"],"affiliations":[{"raw_affiliation_string":"Lattice Data, Inc., Palo Alto, CA","institution_ids":["https://openalex.org/I116921496"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100778973","display_name":"Feiran Wang","orcid":"https://orcid.org/0000-0002-6601-9172"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Feiran Wang","raw_affiliation_strings":["Stanford University, Stanford, CA"],"affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033975637","display_name":"Sen Wu","orcid":"https://orcid.org/0000-0002-6133-4122"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sen Wu","raw_affiliation_strings":["Stanford University, Stanford, CA"],"affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5100383731"],"corresponding_institution_ids":["https://openalex.org/I35440088"],"apc_list":null,"apc_paid":null,"fwci":5.2655,"has_fulltext":false,"cited_by_count":59,"citation_normalized_percentile":{"value":0.9637045,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"60","issue":"5","first_page":"93","last_page":"102"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.864531397819519},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5821533203125},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5046569108963013},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4818383455276489},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.47399961948394775},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4636702537536621},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.46224167943000793},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.46041637659072876},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.45101502537727356},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4111974835395813},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.36934685707092285}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.864531397819519},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5821533203125},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5046569108963013},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4818383455276489},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.47399961948394775},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4636702537536621},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.46224167943000793},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.46041637659072876},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.45101502537727356},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4111974835395813},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.36934685707092285},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3060586","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3060586","pdf_url":null,"source":{"id":"https://openalex.org/S103482838","display_name":"Communications of the ACM","issn_l":"0001-0782","issn":["0001-0782","1557-7317"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications of the ACM","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5699999928474426,"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth"}],"awards":[{"id":"https://openalex.org/G3291827993","display_name":null,"funder_award_id":"XDATA (FA8750-12-2-0335), DEFT (FA8750-13-2-0039), MEMEX, SIMPLEX","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G3858477398","display_name":null,"funder_award_id":"IIS-1353606","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4213346864","display_name":null,"funder_award_id":"U54EB020405","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G5014522687","display_name":null,"funder_award_id":"N000141210041, N000141310129","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306151","display_name":"Alfred P. Sloan Foundation","ror":"https://ror.org/052csg198"},{"id":"https://openalex.org/F4320306202","display_name":"Gordon and Betty Moore Foundation","ror":"https://ror.org/006wxqw41"},{"id":"https://openalex.org/F4320309327","display_name":"Google","ror":"https://ror.org/00njsd438"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W343945789","https://openalex.org/W1489949474","https://openalex.org/W1493490255","https://openalex.org/W1509562192","https://openalex.org/W1512387364","https://openalex.org/W1565102206","https://openalex.org/W1599188306","https://openalex.org/W1788418780","https://openalex.org/W1934084512","https://openalex.org/W1962705364","https://openalex.org/W1965685479","https://openalex.org/W1997945384","https://openalex.org/W2006149654","https://openalex.org/W2009591769","https://openalex.org/W2012670464","https://openalex.org/W2045495924","https://openalex.org/W2052569640","https://openalex.org/W2068737686","https://openalex.org/W2075694579","https://openalex.org/W2079299255","https://openalex.org/W2081210343","https://openalex.org/W2091125641","https://openalex.org/W2093808275","https://openalex.org/W2098679902","https://openalex.org/W2100007248","https://openalex.org/W2101108755","https://openalex.org/W2107598941","https://openalex.org/W2110367654","https://openalex.org/W2115461474","https://openalex.org/W2118038484","https://openalex.org/W2120340025","https://openalex.org/W2129207602","https://openalex.org/W2129629757","https://openalex.org/W2132679783","https://openalex.org/W2133134975","https://openalex.org/W2135209143","https://openalex.org/W2135912864","https://openalex.org/W2138204945","https://openalex.org/W2138243089","https://openalex.org/W2144416276","https://openalex.org/W2144810465","https://openalex.org/W2166706236","https://openalex.org/W2167571757","https://openalex.org/W2171278097","https://openalex.org/W2184860929","https://openalex.org/W2251812060","https://openalex.org/W2396924315","https://openalex.org/W2430018012","https://openalex.org/W2739934489","https://openalex.org/W2964244261","https://openalex.org/W4230201548","https://openalex.org/W4299094297","https://openalex.org/W4299551239"],"related_works":["https://openalex.org/W2055243143","https://openalex.org/W2943623134","https://openalex.org/W2494523064","https://openalex.org/W2215759665","https://openalex.org/W2030292806","https://openalex.org/W2960358116","https://openalex.org/W4287727129","https://openalex.org/W3041172967","https://openalex.org/W2163814182","https://openalex.org/W2368237856"],"abstract_inverted_index":{"The":[0,64],"dark":[1,171],"data":[2,20,41,76,172],"extraction":[3,107,173],"or":[4,129],"knowledge":[5],"base":[6],"construction":[7,168],"(KBC)":[8],"problem":[9,32],"is":[10,29,69,87],"to":[11,58,60,70,113,139,165],"populate":[12],"a":[13,30,49,80],"relational":[14],"database":[15,53],"with":[16,124],"information":[17],"from":[18],"unstructured":[19],"sources,":[21],"such":[22,141,170],"as":[23,79,142],"emails,":[24],"webpages,":[25],"and":[26,35,44,54,97,102,145,160],"PDFs.":[27],"KBC":[28,62],"long-standing":[31],"in":[33,67,163],"industry":[34],"research":[36],"that":[37,51,86],"encompasses":[38],"problems":[39,78],"of":[40,99,169],"extraction,":[42],"cleaning,":[43],"integration.":[45],"We":[46,155],"describe":[47],"DeepDive,":[48],"system":[50],"combines":[52],"machine":[55,103],"learning":[56,104],"ideas":[57],"help":[59],"develop":[61],"systems.":[63,174],"key":[65],"idea":[66],"DeepDive":[68,93,125,134,164],"frame":[71],"traditional":[72],"extract-transform-load":[73],"(ETL)":[74],"style":[75],"management":[77],"single":[81],"large":[82],"statistical":[83,100],"inference":[84,101,118],"task":[85],"declaratively":[88],"defined":[89],"by":[90,126],"the":[91,95,132,157,167],"user.":[92],"leverages":[94],"effectiveness":[96],"efficiency":[98],"for":[105],"difficult":[106],"tasks,":[108],"whereas":[109],"not":[110],"requiring":[111],"users":[112],"directly":[114],"write":[115],"any":[116],"probabilistic":[117],"algorithms.":[119],"Instead,":[120],"domain":[121],"experts":[122],"interact":[123],"defining":[127],"features":[128],"rules":[130],"about":[131],"domain.":[133],"has":[135],"been":[136],"successfully":[137],"applied":[138],"domains":[140],"pharmacogenomics,":[143],"paleobiology,":[144],"antihuman":[146],"trafficking":[147],"enforcement,":[148],"achieving":[149],"human-caliber":[150],"quality":[151],"at":[152],"machine-caliber":[153],"scale.":[154],"present":[156],"applications,":[158],"abstractions,":[159],"techniques":[161],"used":[162],"accelerate":[166]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":7},{"year":2018,"cited_by_count":9},{"year":2017,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2017-05-05T00:00:00"}
