{"id":"https://openalex.org/W4393606470","doi":"https://doi.org/10.5281/zenodo.7758091","title":"SANTOS Benchmark for Table Union Search","display_name":"SANTOS Benchmark for Table Union Search","publication_year":2023,"publication_date":"2023-03-22","ids":{"openalex":"https://openalex.org/W4393606470","doi":"https://doi.org/10.5281/zenodo.7758091"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.7758091","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.7758091","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.7758091","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047161025","display_name":"Aamod Khatiwada","orcid":"https://orcid.org/0000-0001-5720-1207"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":true,"raw_author_name":"Khatiwada, Aamod","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049651913","display_name":"Grace Fan","orcid":"https://orcid.org/0000-0001-9020-3642"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Fan, Grace","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002899918","display_name":"Roee Shraga","orcid":"https://orcid.org/0000-0001-8803-8481"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Shraga, Roee","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103115509","display_name":"Zixuan Chen","orcid":"https://orcid.org/0000-0003-2872-1865"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Chen, Zixuan","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086781628","display_name":"Wolfgang Gatterbauer","orcid":"https://orcid.org/0000-0002-9614-0504"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Gatterbauer, Wolfgang","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022619313","display_name":"Ren\u00e9e J. Miller","orcid":"https://orcid.org/0000-0002-1484-4787"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Miller, Ren\u00e9e J.","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049802784","display_name":"Mirek Riedewald","orcid":"https://orcid.org/0000-0002-6102-7472"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Riedewald, Mirek","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5047161025"],"corresponding_institution_ids":["https://openalex.org/I87182695"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.2515999972820282,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.2515999972820282,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7209599018096924},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.696068286895752},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.42932915687561035},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.2695366144180298},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.25916391611099243},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.21441000699996948}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7209599018096924},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.696068286895752},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.42932915687561035},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.2695366144180298},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.25916391611099243},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.21441000699996948}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.7758091","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.7758091","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"doi:10.5281/zenodo.7758091","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.7758091","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2378211422","https://openalex.org/W2745001401","https://openalex.org/W4321353415","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W972276598","https://openalex.org/W2087343574","https://openalex.org/W4246352526"],"abstract_inverted_index":{"This":[0,372],"record":[1],"contains":[2],"the":[3,25,31,48,52,165,185,195,204,212,226,246,278,289,354],"datasets":[4],"released":[5],"with":[6,245,387],"SIGMOD":[7,98],"2023":[8],"paper":[9,59,74],"entitled":[10],"\"SANTOS:":[11],"Relationship-based":[12,93,129],"Semantic":[13,94,130],"Table":[14,95,131,297],"Union":[15,96,132,298],"Search\".":[16],"We":[17,293,314,343],"release":[18,37],"two":[19,259],"new":[20,235,260,303,318],"tabular":[21],"benchmarks":[22,305,348],"to":[23,224,262],"evaluate":[24],"table":[26,176,199,223],"union":[27,229,327,357,395],"search":[28,177,320,328,358],"problem":[29],"over":[30],"data":[32,290,312,385],"lakes.":[33,313],"Furthermore,":[34],"we":[35,210,232,257,281],"also":[36],"relabeled":[38],"ground":[39],"truth":[40],"for":[41,60,68,174,394],"an":[42,273,295,369],"existing":[43,274,296],"TUS":[44,167],"benchmark":[45,168,300],"by":[46,359],"taking":[47],"binary":[49],"relationship":[50,265,361],"between":[51,217,242,266],"columns":[53,220],"into":[54],"account.":[55],"Please":[56],"visit":[57],"our":[58,66,73,317,350],"further":[61],"details.":[62],"If":[63],"you":[64],"use":[65,213],"dataset":[67],"your":[69],"work,":[70,209],"please":[71],"cite":[72],"as:":[75],"Aamod":[76],"Khatiwada,":[77],"Grace":[78,108],"Fan,":[79],"Roee":[80,111],"Shraga,":[81],"Zixuan":[82,114],"Chen,":[83],"Wolfgang":[84,117],"Gatterbauer,":[85],"Ren\u00e9e":[86],"J.":[87,121],"Miller,":[88],"and":[89,107,110,113,116,119,123,301,309,340,391],"MirekRiedewald.":[90],"2023.":[91],"SANTOS:":[92],"Search.":[97],"Conference":[99],"2023,":[100],"ACM":[101],"@article{DBLP:journals/pacmmod/KhatiwadaFSCGMR23,":[102],"author":[103],"=":[104,127,135,141,144,147,150,153],"{Aamod":[105],"Khatiwada":[106],"Fan":[109],"Shraga":[112],"Chen":[115],"Gatterbauer":[118],"Ren{\\'{e}}e":[120],"Miller":[122],"Mirek":[124],"Riedewald},":[125],"title":[126],"{{SANTOS:}":[128],"Search},":[133],"journal":[134],"{Proc.":[136],"{ACM}":[137],"Manag.":[138],"Data},":[139],"volume":[140],"{1},":[142,145],"number":[143],"pages":[146],"{9:1--9:25},":[148],"year":[149],"{2023},":[151],"doi":[152],"{10.1145/3588689},}":[154],"You":[155,162],"can":[156,163],"find":[157,164],"SANTOS":[158,323],"implementation":[159],"at:":[160,169],"https://github.com/northeastern-datalab/santos":[161],"original":[166],"https://github.com/RJMillerLab/table-union-search-benchmark":[170],"Abstract:":[171],"Existing":[172],"techniques":[173],"unionable":[175],"define":[178],"unionability":[179,238,319],"using":[180,392],"metadata":[181],"(tables":[182],"must":[183],"have":[184],"same":[186,205],"or":[187,190],"similar":[188],"schemas)":[189],"column-based":[191,335],"metrics":[192],"(for":[193],"example,":[194],"values":[196],"in":[197,221,250,346,368],"a":[198,222,234,251,283,325,331,376,381],"should":[200],"be":[201,366],"drawn":[202],"from":[203,288,384],"domain).":[206],"In":[207],"this":[208],"introduce":[211,233],"of":[214,219,228,237,248,268,334,356,379],"semantic":[215,264],"relationships":[216,241],"pairs":[218,267],"improve":[225],"accuracy":[227,355],"search.":[230,396],"Consequently,":[231],"notion":[236],"that":[239,306,316,329,349,363],"considers":[240],"columns,":[243,249],"together":[244],"semantics":[247,362],"principled":[252],"way.":[253],"To":[254],"do":[255],"so,":[256],"present":[258,302],"methods":[261],"discover":[263],"columns:":[269],"The":[270],"first":[271],"uses":[272,286,330],"knowledge":[275,287],"base":[276],"(KB),":[277],"second":[279],"(which":[280],"call":[282],"\u201csynthesized":[284],"KB\u201d)":[285],"lake":[291],"itself.":[292],"adopt":[294],"Search":[299],"(open)":[304],"represent":[307],"small":[308],"large":[310],"real":[311],"show":[315,344],"algorithm":[321],"called":[322],"outperforms":[324],"state-of-the-art":[326],"wide":[332],"variety":[333],"semantics,":[336],"including":[337],"word":[338],"embeddings":[339],"regular":[341],"expressions.":[342],"empirically":[345],"all":[347],"synthesized":[351,382],"KB":[352,389],"improves":[353],"representing":[360],"may":[364],"not":[365],"contained":[367],"available":[370],"KB.":[371],"result":[373],"hints":[374],"at":[375],"promising":[377],"future":[378],"creating":[380],"KBs":[383],"lakes":[386],"limited":[388],"coverage":[390],"them":[393]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
