{"id":"https://openalex.org/W2626925008","doi":"https://doi.org/10.1145/3106426.3106484","title":"Preference-driven similarity join","display_name":"Preference-driven similarity join","publication_year":2017,"publication_date":"2017-08-10","ids":{"openalex":"https://openalex.org/W2626925008","doi":"https://doi.org/10.1145/3106426.3106484","mag":"2626925008"},"language":"en","primary_location":{"id":"doi:10.1145/3106426.3106484","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3106426.3106484","pdf_url":null,"source":{"id":"https://openalex.org/S4306524158","display_name":"Proceedings of the International Conference on Web Intelligence","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Web Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1706.04266","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Chuancong Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Chuancong Gao","raw_affiliation_strings":["Simon Fraser University"],"affiliations":[{"raw_affiliation_string":"Simon Fraser University","institution_ids":["https://openalex.org/I18014758"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jiannan Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jiannan Wang","raw_affiliation_strings":["Simon Fraser University"],"affiliations":[{"raw_affiliation_string":"Simon Fraser University","institution_ids":["https://openalex.org/I18014758"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jian Pei","orcid":null},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jian Pei","raw_affiliation_strings":["Simon Fraser University"],"affiliations":[{"raw_affiliation_string":"Simon Fraser University","institution_ids":["https://openalex.org/I18014758"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Rui Li","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rui Li","raw_affiliation_strings":["Google Inc"],"affiliations":[{"raw_affiliation_string":"Google Inc","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":null,"display_name":"Yi Chang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yi Chang","raw_affiliation_strings":["Huawei Research America"],"affiliations":[{"raw_affiliation_string":"Huawei Research America","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I18014758"],"apc_list":null,"apc_paid":null,"fwci":0.3284,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.46840959,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"97","last_page":"105"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.48590001463890076,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.48590001463890076,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.1543000042438507,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.06800000369548798,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.8019000291824341},{"id":"https://openalex.org/keywords/join","display_name":"Join (topology)","score":0.667900025844574},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5839999914169312},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5264999866485596},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.46709999442100525},{"id":"https://openalex.org/keywords/similitude","display_name":"Similitude","score":0.3653999865055084}],"concepts":[{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.8019000291824341},{"id":"https://openalex.org/C2776124973","wikidata":"https://www.wikidata.org/wiki/Q3183033","display_name":"Join (topology)","level":2,"score":0.667900025844574},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5839999914169312},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5264999866485596},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5015000104904175},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.46709999442100525},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3928999900817871},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.36550000309944153},{"id":"https://openalex.org/C143271835","wikidata":"https://www.wikidata.org/wiki/Q254515","display_name":"Similitude","level":2,"score":0.3653999865055084},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.3375999927520752},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33629998564720154},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.31839999556541443},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3176000118255615},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.2590999901294708}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3106426.3106484","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3106426.3106484","pdf_url":null,"source":{"id":"https://openalex.org/S4306524158","display_name":"Proceedings of the International Conference on Web Intelligence","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Web Intelligence","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1706.04266","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1706.04266","pdf_url":"https://arxiv.org/pdf/1706.04266","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1706.04266","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1706.04266","pdf_url":"https://arxiv.org/pdf/1706.04266","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W646176396","https://openalex.org/W1973001156","https://openalex.org/W1979666709","https://openalex.org/W2037562342","https://openalex.org/W2044163187","https://openalex.org/W2059975159","https://openalex.org/W2097184821","https://openalex.org/W2097776316","https://openalex.org/W2104599107","https://openalex.org/W2105436061","https://openalex.org/W2107966677","https://openalex.org/W2114764731","https://openalex.org/W2115214414","https://openalex.org/W2119320829","https://openalex.org/W2121269638","https://openalex.org/W2121516976","https://openalex.org/W2150916025","https://openalex.org/W2151930506","https://openalex.org/W2166400748","https://openalex.org/W2168976073","https://openalex.org/W2296063924","https://openalex.org/W2770217102","https://openalex.org/W6667640943"],"related_works":[],"abstract_inverted_index":{"Similarity":[0],"join,":[1,28],"which":[2,29],"can":[3],"find":[4],"similar":[5],"objects":[6],"(e.g.,":[7],"products,":[8],"names,":[9],"addresses)":[10],"across":[11],"different":[12,86],"sources,":[13],"is":[14,41,102],"powerful":[15],"in":[16,20,34],"dealing":[17],"with":[18],"variety":[19],"big":[21],"data,":[22],"especially":[23],"web":[24],"data.":[25],"Threshold-driven":[26],"similarity":[27,46,73,87,100],"has":[30],"been":[31],"extensively":[32],"studied":[33],"the":[35,56,62,67,95],"past,":[36],"assumes":[37],"that":[38,66],"a":[39,45,70,92,108],"user":[40],"able":[42],"to":[43,53,104],"specify":[44],"threshold,":[47],"and":[48],"then":[49],"focuses":[50],"on":[51,99],"how":[52],"efficiently":[54],"return":[55],"object":[57],"pairs":[58],"whose":[59],"similarities":[60],"pass":[61],"threshold.":[63],"We":[64],"argue":[65],"assumption":[68],"about":[69],"well":[71],"set":[72],"threshold":[74],"may":[75,90],"not":[76],"be":[77,105],"valid":[78],"for":[79,85],"two":[80],"reasons.":[81],"The":[82],"optimal":[83],"thresholds":[84],"join":[88,101],"tasks":[89],"vary":[91],"lot.":[93],"Moreover,":[94],"end-to-end":[96],"time":[97],"spent":[98],"likely":[103],"dominated":[106],"by":[107],"back-and-forth":[109],"threshold-tuning":[110],"process.":[111]},"counts_by_year":[{"year":2017,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2017-06-23T00:00:00"}
