{"id":"https://openalex.org/W2616147950","doi":"https://doi.org/10.14778/3115404.3115409","title":"Auto-join","display_name":"Auto-join","publication_year":2017,"publication_date":"2017-06-01","ids":{"openalex":"https://openalex.org/W2616147950","doi":"https://doi.org/10.14778/3115404.3115409","mag":"2616147950"},"language":"en","primary_location":{"id":"doi:10.14778/3115404.3115409","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3115404.3115409","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013934423","display_name":"Erkang Zhu","orcid":"https://orcid.org/0009-0000-3326-1790"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Erkang Zhu","raw_affiliation_strings":["University of Toronto"],"affiliations":[{"raw_affiliation_string":"University of Toronto","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034908019","display_name":"Yeye He","orcid":"https://orcid.org/0000-0003-2824-5299"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yeye He","raw_affiliation_strings":["Microsoft Research"],"affiliations":[{"raw_affiliation_string":"Microsoft Research","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038037154","display_name":"Surajit Chaudhuri","orcid":"https://orcid.org/0000-0001-8252-5270"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Surajit Chaudhuri","raw_affiliation_strings":["Microsoft Research"],"affiliations":[{"raw_affiliation_string":"Microsoft Research","institution_ids":["https://openalex.org/I4210164937"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5013934423"],"corresponding_institution_ids":["https://openalex.org/I185261750"],"apc_list":null,"apc_paid":null,"fwci":5.4247,"has_fulltext":false,"cited_by_count":63,"citation_normalized_percentile":{"value":0.95737093,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"10","issue":"10","first_page":"1034","last_page":"1045"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/joins","display_name":"Joins","score":0.938206136226654},{"id":"https://openalex.org/keywords/join","display_name":"Join (topology)","score":0.8840682506561279},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.800643265247345},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.46837860345840454},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.46517205238342285},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.4541960060596466},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3642226457595825},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3605504035949707},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3407818675041199},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2349012792110443},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0930391252040863}],"concepts":[{"id":"https://openalex.org/C2778692605","wikidata":"https://www.wikidata.org/wiki/Q4041866","display_name":"Joins","level":2,"score":0.938206136226654},{"id":"https://openalex.org/C2776124973","wikidata":"https://www.wikidata.org/wiki/Q3183033","display_name":"Join (topology)","level":2,"score":0.8840682506561279},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.800643265247345},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.46837860345840454},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.46517205238342285},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.4541960060596466},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3642226457595825},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3605504035949707},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3407818675041199},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2349012792110443},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0930391252040863},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3115404.3115409","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3115404.3115409","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W171450560","https://openalex.org/W643027706","https://openalex.org/W1502767928","https://openalex.org/W1515532631","https://openalex.org/W1972495172","https://openalex.org/W1981578383","https://openalex.org/W1984566373","https://openalex.org/W2021739502","https://openalex.org/W2027939060","https://openalex.org/W2054658115","https://openalex.org/W2110686900","https://openalex.org/W2121516976","https://openalex.org/W2135767707","https://openalex.org/W2152565070","https://openalex.org/W2158874082","https://openalex.org/W2164611950","https://openalex.org/W2260484439","https://openalex.org/W2295310002","https://openalex.org/W2496170334","https://openalex.org/W2504389667","https://openalex.org/W2612824201","https://openalex.org/W4233189519"],"related_works":["https://openalex.org/W2488852078","https://openalex.org/W2393491644","https://openalex.org/W4206577045","https://openalex.org/W3086237447","https://openalex.org/W2589740103","https://openalex.org/W1966967794","https://openalex.org/W1496672428","https://openalex.org/W650102067","https://openalex.org/W2172084996","https://openalex.org/W2397450670"],"abstract_inverted_index":{"Traditional":[0],"equi-join":[1,46],"relies":[2],"solely":[3],"on":[4],"string":[5],"equality":[6],"comparisons":[7],"to":[8,24,65,85,87],"perform":[9],"joins.":[10],"However,":[11],"in":[12,19],"scenarios":[13],"such":[14],"as":[15],"ad-hoc":[16],"data":[17],"analysis":[18],"spreadsheets,":[20],"users":[21],"increasingly":[22],"need":[23],"join":[25],"tables":[26,74,109,113],"whose":[27,70],"join-columns":[28],"are":[29,43],"from":[30,105],"the":[31,116,120],"same":[32],"semantic":[33],"domain":[34],"but":[35],"use":[36],"different":[37],"textual":[38],"representations,":[39],"for":[40],"which":[41],"transformations":[42],"needed":[44],"before":[45],"can":[47,56],"be":[48],"performed.":[49],"We":[50,76],"developed":[51,77],"Auto-Join,":[52],"a":[53,60,67],"system":[54,118],"that":[55,82,115],"automatically":[57],"search":[58],"over":[59],"rich":[61],"space":[62],"of":[63],"operators":[64],"compose":[66],"transformation":[68,122],"program,":[69],"execution":[71],"makes":[72],"input":[73],"equi-join-able.":[75],"an":[78],"optimal":[79],"sampling":[80],"strategy":[81],"allows":[83],"Auto-Join":[84],"scale":[86],"large":[88],"datasets":[89],"efficiently,":[90],"while":[91],"ensuring":[92],"joins":[93,123],"succeed":[94],"with":[95,126],"high":[96,127],"probability.":[97],"Our":[98],"evaluation":[99],"using":[100],"real":[101],"test":[102],"cases":[103],"collected":[104],"both":[106],"public":[107],"web":[108],"and":[110,125],"proprietary":[111],"enterprise":[112],"shows":[114],"proposed":[117],"performs":[119],"desired":[121],"efficiently":[124],"quality.":[128]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":10},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2017-05-26T00:00:00"}
