{"id":"https://openalex.org/W2798664493","doi":"https://doi.org/10.14778/3192965.3192973","title":"Table union search on open data","display_name":"Table union search on open data","publication_year":2018,"publication_date":"2018-03-01","ids":{"openalex":"https://openalex.org/W2798664493","doi":"https://doi.org/10.14778/3192965.3192973","mag":"2798664493"},"language":"en","primary_location":{"id":"doi:10.14778/3192965.3192973","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3192965.3192973","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012572863","display_name":"Fatemeh Nargesian","orcid":"https://orcid.org/0000-0002-4710-8719"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Fatemeh Nargesian","raw_affiliation_strings":["University of Toronto"],"affiliations":[{"raw_affiliation_string":"University of Toronto","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013934423","display_name":"Erkang Zhu","orcid":"https://orcid.org/0009-0000-3326-1790"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Erkang Zhu","raw_affiliation_strings":["University of Toronto"],"affiliations":[{"raw_affiliation_string":"University of Toronto","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108778575","display_name":"Ken Q. Pu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210119257","display_name":"University of Information Technology and Communications","ror":"https://ror.org/028h0pd91","country_code":"IQ","type":"education","lineage":["https://openalex.org/I4210119257"]}],"countries":["IQ"],"is_corresponding":false,"raw_author_name":"Ken Q. Pu","raw_affiliation_strings":["UOIT"],"affiliations":[{"raw_affiliation_string":"UOIT","institution_ids":["https://openalex.org/I4210119257"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022619313","display_name":"Ren\u00e9e J. Miller","orcid":"https://orcid.org/0000-0002-1484-4787"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Ren\u00e9e J. Miller","raw_affiliation_strings":["University of Toronto"],"affiliations":[{"raw_affiliation_string":"University of Toronto","institution_ids":["https://openalex.org/I185261750"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5012572863"],"corresponding_institution_ids":["https://openalex.org/I185261750"],"apc_list":null,"apc_paid":null,"fwci":15.2456,"has_fulltext":false,"cited_by_count":189,"citation_normalized_percentile":{"value":0.99197134,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"11","issue":"7","first_page":"813","last_page":"825"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.8406137228012085},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7916719913482666},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6350670456886292},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5529093742370605},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5138030648231506},{"id":"https://openalex.org/keywords/semantic-search","display_name":"Semantic search","score":0.5058955550193787},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.45899680256843567},{"id":"https://openalex.org/keywords/ontology","display_name":"Ontology","score":0.4406460225582123},{"id":"https://openalex.org/keywords/decision-table","display_name":"Decision table","score":0.4294635057449341},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.41200584173202515},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4107057452201843},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.2197275161743164},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1998806893825531},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11696469783782959},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08502691984176636}],"concepts":[{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.8406137228012085},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7916719913482666},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6350670456886292},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5529093742370605},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5138030648231506},{"id":"https://openalex.org/C166423231","wikidata":"https://www.wikidata.org/wiki/Q1891170","display_name":"Semantic search","level":3,"score":0.5058955550193787},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.45899680256843567},{"id":"https://openalex.org/C25810664","wikidata":"https://www.wikidata.org/wiki/Q44325","display_name":"Ontology","level":2,"score":0.4406460225582123},{"id":"https://openalex.org/C172967692","wikidata":"https://www.wikidata.org/wiki/Q747762","display_name":"Decision table","level":3,"score":0.4294635057449341},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.41200584173202515},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4107057452201843},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.2197275161743164},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1998806893825531},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11696469783782959},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08502691984176636},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111012933","wikidata":"https://www.wikidata.org/wiki/Q3137210","display_name":"Rough set","level":2,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3192965.3192973","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3192965.3192973","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W128746142","https://openalex.org/W1423339008","https://openalex.org/W1532325895","https://openalex.org/W1552847225","https://openalex.org/W1588213250","https://openalex.org/W1736726159","https://openalex.org/W1875032969","https://openalex.org/W1969621019","https://openalex.org/W1981578383","https://openalex.org/W1996505782","https://openalex.org/W2012833704","https://openalex.org/W2020022499","https://openalex.org/W2022166150","https://openalex.org/W2042389627","https://openalex.org/W2064771454","https://openalex.org/W2066806792","https://openalex.org/W2082179583","https://openalex.org/W2092364718","https://openalex.org/W2099908986","https://openalex.org/W2100365109","https://openalex.org/W2106099982","https://openalex.org/W2108223890","https://openalex.org/W2108489852","https://openalex.org/W2111869785","https://openalex.org/W2132069633","https://openalex.org/W2140116426","https://openalex.org/W2148781362","https://openalex.org/W2153252192","https://openalex.org/W2153579005","https://openalex.org/W2158899491","https://openalex.org/W2187089797","https://openalex.org/W2400256190","https://openalex.org/W2510769428","https://openalex.org/W2537515450","https://openalex.org/W2752618741","https://openalex.org/W2950133940","https://openalex.org/W2963174348","https://openalex.org/W2963626623","https://openalex.org/W2997617958","https://openalex.org/W2998704965","https://openalex.org/W4230065791","https://openalex.org/W4285719527","https://openalex.org/W4298042201","https://openalex.org/W6679663036","https://openalex.org/W6712839960","https://openalex.org/W7002050966"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W2745001401","https://openalex.org/W4321353415","https://openalex.org/W2130974462","https://openalex.org/W972276598","https://openalex.org/W4246352526","https://openalex.org/W2028665553","https://openalex.org/W4230315250","https://openalex.org/W2086519370","https://openalex.org/W2087343574"],"abstract_inverted_index":{"We":[0,64,116],"define":[1],"the":[2,34,72,91],"table":[3,21,120],"union":[4,121],"search":[5,122,139],"problem":[6],"and":[7,60,108,126,134],"present":[8],"a":[9,19,66,83,110],"probabilistic":[10],"solution":[11,38],"for":[12,77,130],"finding":[13,131],"tables":[14,26,98,133],"that":[15,43,69,99,118],"are":[16,27,48,87],"unionable":[17,28,46],"with":[18,55],"query":[20],"within":[22],"massive":[23],"repositories.":[24],"Two":[25],"if":[29],"they":[30],"share":[31],"attributes":[32,47,95],"from":[33,50,57],"same":[35],"domain.":[36],"Our":[37],"formalizes":[39],"three":[40],"statistical":[41],"models":[42],"describe":[44],"how":[45],"generated":[49],"set":[51],"domains,":[52],"semantic":[53],"domains":[54],"values":[56],"an":[58],"ontology,":[59],"natural":[61],"language":[62],"domains.":[63],"propose":[65],"data-driven":[67],"approach":[68],"automatically":[70],"determines":[71],"best":[73],"model":[74],"to":[75,89,136],"use":[76],"each":[78],"pair":[79],"of":[80,94,112],"attributes.":[81,149],"Through":[82],"distribution-aware":[84],"algorithm,":[85],"we":[86,106],"able":[88],"find":[90],"optimal":[92],"number":[93],"in":[96,124],"two":[97],"can":[100],"be":[101],"unioned.":[102],"To":[103],"evaluate":[104],"accuracy,":[105],"created":[107],"open-sourced":[109],"benchmark":[111],"Open":[113,141],"Data":[114,142],"tables.":[115],"show":[117],"our":[119],"outperforms":[123],"speed":[125],"accuracy":[127],"existing":[128],"algorithms":[129],"related":[132],"scales":[135],"provide":[137],"efficient":[138],"over":[140],"repositories":[143],"containing":[144],"more":[145],"than":[146],"one":[147],"million":[148]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":34},{"year":2024,"cited_by_count":32},{"year":2023,"cited_by_count":30},{"year":2022,"cited_by_count":21},{"year":2021,"cited_by_count":32},{"year":2020,"cited_by_count":22},{"year":2019,"cited_by_count":12},{"year":2018,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
