{"id":"https://openalex.org/W2113644918","doi":"https://doi.org/10.1145/1935826.1935885","title":"Materializing multi-relational databases from the web using taxonomic queries","display_name":"Materializing multi-relational databases from the web using taxonomic queries","publication_year":2011,"publication_date":"2011-02-01","ids":{"openalex":"https://openalex.org/W2113644918","doi":"https://doi.org/10.1145/1935826.1935885","mag":"2113644918"},"language":"en","primary_location":{"id":"doi:10.1145/1935826.1935885","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1935826.1935885","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the fourth ACM international conference on Web search and data mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039421197","display_name":"Matthew Michelson","orcid":"https://orcid.org/0000-0003-3346-2132"},"institutions":[{"id":"https://openalex.org/I2801840469","display_name":"Torch Technologies (United States)","ror":"https://ror.org/00nrhr905","country_code":"US","type":"company","lineage":["https://openalex.org/I2801840469"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matthew Michelson","raw_affiliation_strings":["Fetch Technologies, El Segundo, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fetch Technologies, El Segundo, CA, USA","institution_ids":["https://openalex.org/I2801840469"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047798029","display_name":"Sofus A. Macskassy","orcid":null},"institutions":[{"id":"https://openalex.org/I2801840469","display_name":"Torch Technologies (United States)","ror":"https://ror.org/00nrhr905","country_code":"US","type":"company","lineage":["https://openalex.org/I2801840469"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sofus A. Macskassy","raw_affiliation_strings":["Fetch Technologies, El Segundo, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fetch Technologies, El Segundo, CA, USA","institution_ids":["https://openalex.org/I2801840469"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045363082","display_name":"Steven Minton","orcid":"https://orcid.org/0000-0001-8640-2905"},"institutions":[{"id":"https://openalex.org/I2801840469","display_name":"Torch Technologies (United States)","ror":"https://ror.org/00nrhr905","country_code":"US","type":"company","lineage":["https://openalex.org/I2801840469"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Steven N. Minton","raw_affiliation_strings":["Fetch Technologies, El Segundo, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fetch Technologies, El Segundo, CA, USA","institution_ids":["https://openalex.org/I2801840469"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086169451","display_name":"Lise Getoor","orcid":null},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lise Getoor","raw_affiliation_strings":["University of Maryland, College Park, College Park, MD, USA","University of Maryland, College Park, College Park, MD, USA;"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Maryland, College Park, College Park, MD, USA","institution_ids":["https://openalex.org/I66946132"]},{"raw_affiliation_string":"University of Maryland, College Park, College Park, MD, USA;","institution_ids":["https://openalex.org/I66946132"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.7641,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.83854407,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"355","last_page":"364"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.986299991607666,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.98580002784729,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8085079193115234},{"id":"https://openalex.org/keywords/joins","display_name":"Joins","score":0.7408437728881836},{"id":"https://openalex.org/keywords/tuple","display_name":"Tuple","score":0.6738094687461853},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6010391116142273},{"id":"https://openalex.org/keywords/relationship-extraction","display_name":"Relationship extraction","score":0.5859472751617432},{"id":"https://openalex.org/keywords/relational-database","display_name":"Relational database","score":0.5702376365661621},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.5253557562828064},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.521623969078064},{"id":"https://openalex.org/keywords/column","display_name":"Column (typography)","score":0.5116820335388184},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.44213634729385376},{"id":"https://openalex.org/keywords/relation","display_name":"Relation (database)","score":0.4272300899028778},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.414972722530365},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.37233293056488037},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3709806203842163},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11937013268470764}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8085079193115234},{"id":"https://openalex.org/C2778692605","wikidata":"https://www.wikidata.org/wiki/Q4041866","display_name":"Joins","level":2,"score":0.7408437728881836},{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.6738094687461853},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6010391116142273},{"id":"https://openalex.org/C153604712","wikidata":"https://www.wikidata.org/wiki/Q7310755","display_name":"Relationship extraction","level":3,"score":0.5859472751617432},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.5702376365661621},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.5253557562828064},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.521623969078064},{"id":"https://openalex.org/C2780551164","wikidata":"https://www.wikidata.org/wiki/Q2306599","display_name":"Column (typography)","level":3,"score":0.5116820335388184},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.44213634729385376},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.4272300899028778},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.414972722530365},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.37233293056488037},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3709806203842163},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11937013268470764},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1935826.1935885","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1935826.1935885","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the fourth ACM international conference on Web search and data mining","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.188.3518","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.188.3518","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.mmichelson.com/paps/wsdm2011-db.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W157725869","https://openalex.org/W167355512","https://openalex.org/W179050280","https://openalex.org/W194014093","https://openalex.org/W1493490255","https://openalex.org/W1534024124","https://openalex.org/W1567365482","https://openalex.org/W1984566373","https://openalex.org/W2060565333","https://openalex.org/W2069388662","https://openalex.org/W2088762045","https://openalex.org/W2100869678","https://openalex.org/W2102474120","https://openalex.org/W2103931177","https://openalex.org/W2108489852","https://openalex.org/W2110686900","https://openalex.org/W2113490459","https://openalex.org/W2117058208","https://openalex.org/W2126539437","https://openalex.org/W2135767707","https://openalex.org/W2140116426","https://openalex.org/W2145007893","https://openalex.org/W2152749438","https://openalex.org/W6629638141"],"related_works":["https://openalex.org/W2016456293","https://openalex.org/W2161128265","https://openalex.org/W1997896902","https://openalex.org/W2138101384","https://openalex.org/W2140894225","https://openalex.org/W2125826941","https://openalex.org/W4212828571","https://openalex.org/W2888645935","https://openalex.org/W2798237655","https://openalex.org/W2408506617"],"abstract_inverted_index":{"Recently,":[0],"much":[1],"attention":[2],"has":[3,158],"been":[4],"given":[5,67],"to":[6,134,166,232],"extracting":[7,60],"tables":[8,64,100,197],"from":[9,31,198],"Web":[10,32,35,200],"data.":[11],"In":[12,52],"this":[13,53],"problem,":[14],"the":[15,44,48,57,78,98,136,140,204,214],"column":[16],"definitions":[17],"and":[18,61,87,94,118,155,184,202],"tuples":[19,227],"(such":[20],"as":[21,38,74],"what":[22,27],"\"company\"":[23],"is":[24],"headquartered":[25],"in":[26,65],"\"city,\")":[28],"are":[29,114,171],"extracted":[30,99],"text,":[33],"structured":[34],"data":[36],"such":[37],"lists,":[39],"or":[40,116],"results":[41,188],"of":[42,50,59,195,225],"querying":[43],"deep":[45],"Web,":[46],"creating":[47],"table":[49,111],"interest.":[51],"paper,":[54],"we":[55,209],"examine":[56],"problem":[58],"discovering":[62,77,120],"multiple":[63],"a":[66,70,222],"domain,":[68],"generating":[69],"truly":[71,125],"multi-relational":[72,126],"database":[73],"output.":[75,127],"Beyond":[76],"relations":[79,96,106,205],"that":[80,113,160,190,211,228],"define":[81],"single":[82],"tables,":[83],"our":[84,107,122,129,175,191,218],"approach":[85,130,176,192,219],"discovers":[86,95,203],"leverages":[88],"\"within":[89,215],"column\"":[90,216],"set":[91],"membership":[92],"relations,":[93],"across":[97],"(e.g.,":[101],"joins).":[102],"By":[103],"leveraging":[104,213],"within-column":[105],"method":[108,123],"can":[109,220],"extract":[110],"instances":[112],"ambiguous":[115],"rare,":[117],"by":[119,212],"joins,":[121],"generates":[124,193],"Further,":[128,208],"uses":[131],"taxonomic":[132,153],"queries":[133],"bootstrap":[135],"extraction,":[137],"rather":[138],"than":[139,152],"more":[141,149],"traditional":[142],"\"seed":[143],"instances.\"":[144],"Creating":[145],"seeds":[146,169],"often":[147],"requires":[148],"domain":[150],"knowledge":[151],"queries,":[154],"previous":[156],"work":[157],"shown":[159],"extraction":[161],"methods":[162],"may":[163],"be":[164,230],"sensitive":[165],"which":[167],"input":[168],"they":[170],"given.":[172],"We":[173],"test":[174],"on":[177],"two":[178],"real":[179],"world":[180],"domains:":[181],"NBA":[182],"basketball":[183],"cancer":[185],"information.":[186],"Our":[187],"demonstrate":[189],"databases":[194],"relevant":[196,226],"disparate":[199],"information,":[201],"between":[206],"them.":[207],"show":[210],"relation":[217],"identify":[221],"significant":[223],"number":[224],"would":[229],"difficult":[231],"do":[233],"so":[234],"otherwise.":[235]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
