{"id":"https://openalex.org/W2428672503","doi":"https://doi.org/10.1109/icde.2016.7498368","title":"Similarity Group-By operators for multi-dimensional relational data","display_name":"Similarity Group-By operators for multi-dimensional relational data","publication_year":2016,"publication_date":"2016-05-01","ids":{"openalex":"https://openalex.org/W2428672503","doi":"https://doi.org/10.1109/icde.2016.7498368","mag":"2428672503"},"language":"en","primary_location":{"id":"doi:10.1109/icde.2016.7498368","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icde.2016.7498368","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE 32nd International Conference on Data Engineering (ICDE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101512458","display_name":"Mingjie Tang","orcid":"https://orcid.org/0000-0002-8893-4574"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Mingjie Tang","raw_affiliation_strings":["Purdue University"],"affiliations":[{"raw_affiliation_string":"Purdue University","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054261003","display_name":"Ruby Y. Tahboub","orcid":null},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ruby Y. Tahboub","raw_affiliation_strings":["Purdue University"],"affiliations":[{"raw_affiliation_string":"Purdue University","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000123743","display_name":"Walid G. Aref","orcid":"https://orcid.org/0000-0001-8169-7775"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Walid G. Aref","raw_affiliation_strings":["Purdue University"],"affiliations":[{"raw_affiliation_string":"Purdue University","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113562454","display_name":"Mikhail J. Atallah","orcid":null},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mikhail J. Atallah","raw_affiliation_strings":["Purdue University"],"affiliations":[{"raw_affiliation_string":"Purdue University","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028226990","display_name":"Qutaibah Malluhi","orcid":"https://orcid.org/0000-0003-2849-0569"},"institutions":[{"id":"https://openalex.org/I60342839","display_name":"Qatar University","ror":"https://ror.org/00yhnba62","country_code":"QA","type":"education","lineage":["https://openalex.org/I60342839"]}],"countries":["QA"],"is_corresponding":false,"raw_author_name":"Qutaibah M. Malluhi","raw_affiliation_strings":["Qatar University"],"affiliations":[{"raw_affiliation_string":"Qatar University","institution_ids":["https://openalex.org/I60342839"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026923999","display_name":"Mourad Ouzzani","orcid":"https://orcid.org/0000-0002-4035-3025"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mourad Ouzzani","raw_affiliation_strings":["Qatar Computing Research Institute"],"affiliations":[{"raw_affiliation_string":"Qatar Computing Research Institute","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088912009","display_name":"Yasin N. Silva","orcid":"https://orcid.org/0000-0003-1852-1683"},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yasin N. Silva","raw_affiliation_strings":["Arizona State University"],"affiliations":[{"raw_affiliation_string":"Arizona State University","institution_ids":["https://openalex.org/I55732556"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101512458"],"corresponding_institution_ids":["https://openalex.org/I219193219"],"apc_list":null,"apc_paid":null,"fwci":1.5131,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.83327379,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1448","last_page":"1449"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/tuple","display_name":"Tuple","score":0.882726788520813},{"id":"https://openalex.org/keywords/sql","display_name":"SQL","score":0.6327602863311768},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6009357571601868},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5602660179138184},{"id":"https://openalex.org/keywords/operator","display_name":"Operator (biology)","score":0.5438358187675476},{"id":"https://openalex.org/keywords/group","display_name":"Group (periodic table)","score":0.5368412733078003},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.49061718583106995},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4394184350967407},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.41683998703956604},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.28201115131378174},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.2686137855052948},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.15716883540153503},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1309451460838318},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09766435623168945}],"concepts":[{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.882726788520813},{"id":"https://openalex.org/C510870499","wikidata":"https://www.wikidata.org/wiki/Q47607","display_name":"SQL","level":2,"score":0.6327602863311768},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6009357571601868},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5602660179138184},{"id":"https://openalex.org/C17020691","wikidata":"https://www.wikidata.org/wiki/Q139677","display_name":"Operator (biology)","level":5,"score":0.5438358187675476},{"id":"https://openalex.org/C2781311116","wikidata":"https://www.wikidata.org/wiki/Q83306","display_name":"Group (periodic table)","level":2,"score":0.5368412733078003},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.49061718583106995},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4394184350967407},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.41683998703956604},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.28201115131378174},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2686137855052948},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.15716883540153503},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1309451460838318},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09766435623168945},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C158448853","wikidata":"https://www.wikidata.org/wiki/Q425218","display_name":"Repressor","level":4,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C86339819","wikidata":"https://www.wikidata.org/wiki/Q407384","display_name":"Transcription factor","level":3,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icde.2016.7498368","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icde.2016.7498368","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE 32nd International Conference on Data Engineering (ICDE)","raw_type":"proceedings-article"},{"id":"pmh:oai:qspace.qu.edu.qa:10576/56757","is_oa":false,"landing_page_url":"http://hdl.handle.net/10576/56757","pdf_url":null,"source":{"id":"https://openalex.org/S4306400014","display_name":"Qatar University QSpace (Qatar University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I60342839","host_organization_name":"Qatar University","host_organization_lineage":["https://openalex.org/I60342839"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332753","display_name":"Qatar National Research Fund","ror":"https://ror.org/01svaqq28"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W1525397625","https://openalex.org/W2428672503","https://openalex.org/W6631226029"],"related_works":["https://openalex.org/W4245395944","https://openalex.org/W2143551613","https://openalex.org/W2138823233","https://openalex.org/W1979740464","https://openalex.org/W2143345456","https://openalex.org/W1789991335","https://openalex.org/W2562731034","https://openalex.org/W4315705795","https://openalex.org/W2101053337","https://openalex.org/W3030674916"],"abstract_inverted_index":{"The":[0,19,101,124,208,230],"SQL":[1,33],"group-by":[2],"operator":[3,23,103,126],"plays":[4],"an":[5],"important":[6],"role":[7],"in":[8,14,71,80,114,149,182,255],"summarizing":[9],"and":[10,61,77,186,197,203,218,236],"aggregating":[11],"large":[12],"datasets":[13],"a":[15,115,132,136,153,172,189,237],"data":[16,37],"analytics":[17],"stack.":[18],"Similarity":[20],"SQL-based":[21],"Group-By":[22],"(SGB,":[24],"for":[25,98,192],"short)":[26],"extends":[27],"the":[28,31,81,105,112,128,139,150,157,176,180,199,219,227,243,263],"semantics":[29,167],"of":[30,160,226,252],"standard":[32,228],"Group-by":[34],"by":[35,211],"grouping":[36,48],"with":[38,170],"similar":[39],"but":[40],"not":[41,85],"necessarily":[42],"equal":[43],"values.":[44],"While":[45],"existing":[46],"similarity-based":[47],"operators":[49,97,202,213],"efficiently":[50],"realize":[51],"these":[52,212],"approximate":[53],"semantics,":[54],"they":[55],"primarily":[56],"focus":[57],"on":[58,234],"one-dimensional":[59],"attributes":[60,64],"treat":[62],"multi-dimensional":[63,82,99],"independently.":[65],"However,":[66],"correlated":[67],"attributes,":[68],"such":[69,171],"as":[70],"spatial":[72],"data,":[73,240],"are":[74,84,117,222],"processed":[75],"independently,":[76],"hence,":[78],"groups":[79],"space":[83],"detected":[86],"properly.":[87],"To":[88],"address":[89],"this":[90,193],"problem,":[91],"we":[92,163],"introduce":[93,164],"two":[94],"new":[95,190,200],"SGB":[96,201],"data.":[100],"first":[102],"is":[104,127,141],"clique":[106],"(or":[107],"distance-to-all)":[108],"SGB,":[109,130],"where":[110,131],"all":[111],"tuples":[113],"group":[116,137,191],"within":[118,142],"some":[119,143],"distance":[120,144],"from":[121,145],"each":[122],"other.":[123],"second":[125],"distance-to-any":[129],"tuple":[133,140,148,154,181],"belongs":[134],"to":[135,168,215,224,249,261],"if":[138],"any":[146,183],"other":[147],"group.":[151],"Since":[152],"may":[155],"satisfy":[156],"membership":[158],"criterion":[159],"multiple":[161],"groups,":[162],"three":[165,250],"different":[166],"deal":[169],"case:":[173],"(i)":[174],"eliminate":[175],"tuple,":[177],"(ii)":[178],"put":[179],"one":[184],"group,":[185],"(iii)":[187],"create":[188],"tuple.":[194],"We":[195],"implement":[196],"test":[198],"their":[204],"algorithms":[205,245],"inside":[206],"PostgreSQL.":[207],"overhead":[209],"introduced":[210],"proves":[214],"be":[216],"minimal":[217],"execution":[220],"times":[221],"comparable":[223],"those":[225],"Group-by.":[229],"experimental":[231],"study,":[232],"based":[233],"TPC-H":[235],"social":[238],"check-in":[239],"demonstrates":[241],"that":[242],"proposed":[244],"can":[246],"achieve":[247],"up":[248],"orders":[251],"magnitude":[253],"enhancement":[254],"performance":[256],"over":[257],"baseline":[258],"methods":[259],"developed":[260],"solve":[262],"same":[264],"problem.":[265]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
