{"id":"https://openalex.org/W4308883303","doi":"https://doi.org/10.1145/3568027","title":"Efficient Sorting, Duplicate Removal, Grouping, and Aggregation","display_name":"Efficient Sorting, Duplicate Removal, Grouping, and Aggregation","publication_year":2022,"publication_date":"2022-11-10","ids":{"openalex":"https://openalex.org/W4308883303","doi":"https://doi.org/10.1145/3568027"},"language":"en","primary_location":{"id":"doi:10.1145/3568027","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3568027","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3568027","source":{"id":"https://openalex.org/S90119964","display_name":"ACM Transactions on Database Systems","issn_l":"0362-5915","issn":["0362-5915","1557-4644"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Database Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3568027","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101511221","display_name":"Thanh Do","orcid":"https://orcid.org/0000-0001-9893-5725"},"institutions":[{"id":"https://openalex.org/I4210098601","display_name":"Celsion (United States)","ror":"https://ror.org/00tzssk35","country_code":"US","type":"company","lineage":["https://openalex.org/I4210098601"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Thanh Do","raw_affiliation_strings":["Celonis Inc., New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Celonis Inc., New York, NY, USA","institution_ids":["https://openalex.org/I4210098601"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041862671","display_name":"Goetz Graefe","orcid":"https://orcid.org/0000-0003-0194-6466"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Goetz Graefe","raw_affiliation_strings":["Google Inc., Madison, WI, USA"],"affiliations":[{"raw_affiliation_string":"Google Inc., Madison, WI, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017678555","display_name":"Jeffrey F. Naughton","orcid":"https://orcid.org/0000-0002-3710-8096"},"institutions":[{"id":"https://openalex.org/I4210098601","display_name":"Celsion (United States)","ror":"https://ror.org/00tzssk35","country_code":"US","type":"company","lineage":["https://openalex.org/I4210098601"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jeffrey Naughton","raw_affiliation_strings":["Celonis Inc., New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Celonis Inc., New York, NY, USA","institution_ids":["https://openalex.org/I4210098601"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101511221"],"corresponding_institution_ids":["https://openalex.org/I4210098601"],"apc_list":null,"apc_paid":null,"fwci":2.5579,"has_fulltext":true,"cited_by_count":18,"citation_normalized_percentile":{"value":0.89908083,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"47","issue":"4","first_page":"1","last_page":"35"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8808745741844177},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.6664454936981201},{"id":"https://openalex.org/keywords/sort","display_name":"sort","score":0.626992404460907},{"id":"https://openalex.org/keywords/sorting-algorithm","display_name":"Sorting algorithm","score":0.5976386070251465},{"id":"https://openalex.org/keywords/sorting","display_name":"Sorting","score":0.5845150947570801},{"id":"https://openalex.org/keywords/merge","display_name":"Merge (version control)","score":0.5433358550071716},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5270987749099731},{"id":"https://openalex.org/keywords/merge-algorithm","display_name":"Merge algorithm","score":0.5002062320709229},{"id":"https://openalex.org/keywords/hash-table","display_name":"Hash table","score":0.47171658277511597},{"id":"https://openalex.org/keywords/query-optimization","display_name":"Query optimization","score":0.43132826685905457},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3446025848388672},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.2222103774547577},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.1978238821029663}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8808745741844177},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.6664454936981201},{"id":"https://openalex.org/C88548561","wikidata":"https://www.wikidata.org/wiki/Q347599","display_name":"sort","level":2,"score":0.626992404460907},{"id":"https://openalex.org/C108094655","wikidata":"https://www.wikidata.org/wiki/Q181593","display_name":"Sorting algorithm","level":3,"score":0.5976386070251465},{"id":"https://openalex.org/C111696304","wikidata":"https://www.wikidata.org/wiki/Q2303697","display_name":"Sorting","level":2,"score":0.5845150947570801},{"id":"https://openalex.org/C197129107","wikidata":"https://www.wikidata.org/wiki/Q1921621","display_name":"Merge (version control)","level":2,"score":0.5433358550071716},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5270987749099731},{"id":"https://openalex.org/C140086265","wikidata":"https://www.wikidata.org/wiki/Q11341754","display_name":"Merge algorithm","level":4,"score":0.5002062320709229},{"id":"https://openalex.org/C67388219","wikidata":"https://www.wikidata.org/wiki/Q207440","display_name":"Hash table","level":3,"score":0.47171658277511597},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.43132826685905457},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3446025848388672},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2222103774547577},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.1978238821029663},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3568027","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3568027","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3568027","source":{"id":"https://openalex.org/S90119964","display_name":"ACM Transactions on Database Systems","issn_l":"0362-5915","issn":["0362-5915","1557-4644"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Database Systems","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3568027","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3568027","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3568027","source":{"id":"https://openalex.org/S90119964","display_name":"ACM Transactions on Database Systems","issn_l":"0362-5915","issn":["0362-5915","1557-4644"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Database Systems","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4308883303.pdf","grobid_xml":"https://content.openalex.org/works/W4308883303.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W1605782097","https://openalex.org/W1739920298","https://openalex.org/W1972115340","https://openalex.org/W1972754280","https://openalex.org/W1974796034","https://openalex.org/W1981420413","https://openalex.org/W2020191321","https://openalex.org/W2058954251","https://openalex.org/W2063259549","https://openalex.org/W2067089961","https://openalex.org/W2068739275","https://openalex.org/W2095891890","https://openalex.org/W2104242181","https://openalex.org/W2106887953","https://openalex.org/W2147722789","https://openalex.org/W2151203234","https://openalex.org/W2153329411","https://openalex.org/W2158237121","https://openalex.org/W2169486917","https://openalex.org/W2173213060","https://openalex.org/W2326587081","https://openalex.org/W2474900888","https://openalex.org/W2901608006","https://openalex.org/W3023647491","https://openalex.org/W3137759927","https://openalex.org/W3197796305","https://openalex.org/W4249653116","https://openalex.org/W4299415997","https://openalex.org/W4300868310"],"related_works":["https://openalex.org/W4320925713","https://openalex.org/W1997715509","https://openalex.org/W2925598649","https://openalex.org/W2190513794","https://openalex.org/W19461966","https://openalex.org/W2595864772","https://openalex.org/W2511099490","https://openalex.org/W4389428824","https://openalex.org/W3014215895","https://openalex.org/W2140238930"],"abstract_inverted_index":{"Database":[0],"query":[1,48,144],"processing":[2],"requires":[3,22],"algorithms":[4,12],"for":[5,73,82,120,134,159,192],"duplicate":[6,161],"removal,":[7,162],"grouping,":[8,163],"and":[9,32,67,70,99,109,113,164,179],"aggregation.":[10,165],"Three":[11],"exist:":[13],"in-stream":[14],"aggregation":[15,26,34,79,107,190],"is":[16,80,138],"most":[17],"efficient":[18],"by":[19],"far":[20],"but":[21],"sorted":[23,74,119,205],"input;":[24],"sort-based":[25,160,181],"relies":[27,35],"on":[28,36,56],"external":[29],"merge":[30,127],"sort;":[31],"hash":[33,39,42],"an":[37],"in-memory":[38],"table":[40],"plus":[41],"partitioning":[43],"to":[44,53,117,147],"temporary":[45],"storage.":[46],"Cost-based":[47],"optimization":[49],"chooses":[50],"which":[51],"algorithm":[52,149,158,168,191,198,203,219],"use":[54],"based":[55],"several":[57],"factors,":[58],"including":[59],"the":[60,64,71,86,96,114,130,201,217],"sort":[61],"order":[62],"of":[63,92,226],"input,":[65],"input":[66,98,108],"output":[68,83,110,115,206],"sizes,":[69],"need":[72],"output.":[75],"For":[76],"example,":[77],"hash-based":[78,178],"ideal":[81],"smaller":[84],"than":[85],"available":[87],"memory":[88],"(e.g.,":[89],"Query":[90,215],"1":[91],"TPC-H),":[93],"whereas":[94],"sorting":[95,102],"entire":[97],"aggregating":[100],"after":[101],"are":[103,111],"preferable":[104],"when":[105],"both":[106,176],"large":[112],"needs":[116],"be":[118],"a":[121,126,135,156,187],"subsequent":[122,211],"operation":[123],"such":[124],"as":[125,173,175,186],"join.":[128],"Unfortunately,":[129],"size":[131],"information":[132],"required":[133],"sound":[136],"choice":[137],"often":[139],"inaccurate":[140],"or":[141],"unavailable":[142],"during":[143],"optimization,":[145],"leading":[146],"sub-optimal":[148],"choices.":[150,199],"In":[151],"response,":[152],"this":[153],"article":[154],"introduces":[155],"new":[157,167,202,218],"The":[166],"always":[169],"performs":[170],"at":[171],"least":[172],"well":[174],"traditional":[177,180],"algorithms.":[182],"It":[183],"can":[184,208],"serve":[185],"system\u2019s":[188],"only":[189],"unsorted":[193],"inputs,":[194],"thus":[195],"preventing":[196],"erroneous":[197],"Furthermore,":[200],"produces":[204],"that":[207,223],"speed":[209],"up":[210],"operations.":[212],"Google\u2019s":[213],"F1":[214],"uses":[216],"in":[220],"production":[221],"workloads":[222],"aggregate":[224],"petabytes":[225],"data":[227],"every":[228],"day.":[229]},"counts_by_year":[{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
