{"id":"https://openalex.org/W4413978011","doi":"https://doi.org/10.14778/3749646.3749715","title":"OmniMatch: Joinability Discovery in Data Products","display_name":"OmniMatch: Joinability Discovery in Data Products","publication_year":2025,"publication_date":"2025-07-01","ids":{"openalex":"https://openalex.org/W4413978011","doi":"https://doi.org/10.14778/3749646.3749715"},"language":"en","primary_location":{"id":"doi:10.14778/3749646.3749715","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3749646.3749715","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103144380","display_name":"Christos Koutras","orcid":"https://orcid.org/0000-0003-3015-154X"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Christos Koutras","raw_affiliation_strings":["TU Delft"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"TU Delft","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040806743","display_name":"Jiani Zhang","orcid":"https://orcid.org/0000-0003-0074-6761"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiani Zhang","raw_affiliation_strings":["Google"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042766429","display_name":"Xiao Qin","orcid":"https://orcid.org/0000-0002-8345-3587"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao Qin","raw_affiliation_strings":["AWS"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AWS","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004959524","display_name":"Chuan Lei","orcid":"https://orcid.org/0000-0001-6265-9554"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chuan Lei","raw_affiliation_strings":["AWS"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AWS","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018583531","display_name":"Vasileios Ioannidis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vasileios Ioannidis","raw_affiliation_strings":["AWS"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AWS","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035605036","display_name":"Christos Faloutsos","orcid":"https://orcid.org/0000-0003-2996-9790"},"institutions":[{"id":"https://openalex.org/I4210129030","display_name":"American Metal Processing (United States)","ror":"https://ror.org/03953th04","country_code":"US","type":"company","lineage":["https://openalex.org/I4210129030"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christos Faloutsos","raw_affiliation_strings":["AWS &amp; CMU"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AWS &amp; CMU","institution_ids":["https://openalex.org/I4210129030"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082384108","display_name":"George Karypis","orcid":"https://orcid.org/0000-0003-2753-1437"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"George Karypis","raw_affiliation_strings":["AWS"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AWS","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002932353","display_name":"Asterios Katsifodimos","orcid":"https://orcid.org/0000-0002-6717-2945"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Asterios Katsifodimos","raw_affiliation_strings":["AWS &amp; TU Delft"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AWS &amp; TU Delft","institution_ids":["https://openalex.org/I98358874"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25011852,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"18","issue":"11","first_page":"4588","last_page":"4601"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9889000058174133,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/business","display_name":"Business","score":0.3860247731208801},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3834349811077118},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.330885112285614}],"concepts":[{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.3860247731208801},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3834349811077118},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.330885112285614}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3749646.3749715","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3749646.3749715","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":69,"referenced_works":["https://openalex.org/W760598031","https://openalex.org/W1516019649","https://openalex.org/W1984639113","https://openalex.org/W1996505782","https://openalex.org/W2045812729","https://openalex.org/W2112129552","https://openalex.org/W2123878016","https://openalex.org/W2125980212","https://openalex.org/W2149364369","https://openalex.org/W2156543375","https://openalex.org/W2157060173","https://openalex.org/W2169387919","https://openalex.org/W2250539671","https://openalex.org/W2340222647","https://openalex.org/W2599674900","https://openalex.org/W2604314403","https://openalex.org/W2606555609","https://openalex.org/W2606791715","https://openalex.org/W2612560781","https://openalex.org/W2750779823","https://openalex.org/W2788550262","https://openalex.org/W2790634852","https://openalex.org/W2795302121","https://openalex.org/W2798664493","https://openalex.org/W2890187992","https://openalex.org/W2924309908","https://openalex.org/W2930957955","https://openalex.org/W2948163032","https://openalex.org/W2951438725","https://openalex.org/W2951621897","https://openalex.org/W2963626623","https://openalex.org/W2970204018","https://openalex.org/W2970641574","https://openalex.org/W2971681342","https://openalex.org/W3023459626","https://openalex.org/W3032215537","https://openalex.org/W3034944976","https://openalex.org/W3035231859","https://openalex.org/W3037878364","https://openalex.org/W3046744391","https://openalex.org/W3147562045","https://openalex.org/W3158303960","https://openalex.org/W3164968002","https://openalex.org/W3170190513","https://openalex.org/W3174181645","https://openalex.org/W3174637548","https://openalex.org/W3196877232","https://openalex.org/W3201053014","https://openalex.org/W4231851111","https://openalex.org/W4246662059","https://openalex.org/W4281826654","https://openalex.org/W4281845057","https://openalex.org/W4285451014","https://openalex.org/W4286447321","https://openalex.org/W4289533913","https://openalex.org/W4310390625","https://openalex.org/W4321448337","https://openalex.org/W4375928372","https://openalex.org/W4380433117","https://openalex.org/W4380433159","https://openalex.org/W4385270687","https://openalex.org/W4385653220","https://openalex.org/W4385893866","https://openalex.org/W4389523882","https://openalex.org/W4400909768","https://openalex.org/W4401042475","https://openalex.org/W4401043101","https://openalex.org/W4401856724","https://openalex.org/W4401857408"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"We":[0],"propose":[1],"OmniMatch":[2,24,36,59,84],",":[3],"a":[4],"novel":[5],"joinability":[6,52],"discovery":[7,53],"technique,":[8],"specifically":[9],"tailored":[10],"for":[11,102],"the":[12,49,56,82],"needs":[13],"of":[14,21,51],"data":[15,67],"products":[16],":":[17],"cohesive":[18],"curated":[19],"collections":[20],"tabular":[22],"datasets.":[23],"combines":[25],"multiple":[26],"column-pair":[27],"similarity":[28,104],"measures":[29],"leveraging":[30,43],"self-supervised":[31],"Graph":[32],"Neural":[33],"Networks":[34],"(GNNs).":[35],"'s":[37],"GNN":[38],"captures":[39],"column":[40,70],"relatedness":[41],"by":[42,63],"graph":[44],"neighborhood":[45],"information,":[46],"significantly":[47],"improving":[48],"recall":[50],"tasks.":[54],"At":[55],"same":[57],"time,":[58],"increases":[60],"its":[61,65],"precision":[62],"augmenting":[64],"training":[66],"with":[68],"negative":[69,76],"join":[71],"examples":[72],"through":[73],"an":[74],"automated":[75],"example":[77],"generation":[78],"process.":[79],"Compared":[80],"to":[81,87],"state-of-the-art,":[83],"exhibits":[85],"up":[86],"14%":[88],"higher":[89],"effectiveness":[90],"in":[91],"F1":[92],"score":[93],"and":[94],"AUC":[95],"without":[96],"relying":[97],"on":[98],"individual,":[99],"user-provided":[100],"thresholds":[101],"each":[103],"metric.":[105]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
