{"id":"https://openalex.org/W2075620950","doi":"https://doi.org/10.1145/1739041.1739056","title":"Optimizing joins in a map-reduce environment","display_name":"Optimizing joins in a map-reduce environment","publication_year":2010,"publication_date":"2010-03-16","ids":{"openalex":"https://openalex.org/W2075620950","doi":"https://doi.org/10.1145/1739041.1739056","mag":"2075620950"},"language":"en","primary_location":{"id":"doi:10.1145/1739041.1739056","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1739041.1739056","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 13th International Conference on Extending Database Technology","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://doi.org/10.1145/1739041.1739056","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088568415","display_name":"Foto Afrati","orcid":null},"institutions":[{"id":"https://openalex.org/I174458059","display_name":"National Technical University of Athens","ror":"https://ror.org/03cx6bg69","country_code":"GR","type":"education","lineage":["https://openalex.org/I174458059"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"Foto N. Afrati","raw_affiliation_strings":["National Technical University of Athens, Greece",", National Technical University of Athens, Greece"],"affiliations":[{"raw_affiliation_string":"National Technical University of Athens, Greece","institution_ids":["https://openalex.org/I174458059"]},{"raw_affiliation_string":", National Technical University of Athens, Greece","institution_ids":["https://openalex.org/I174458059"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089065417","display_name":"Jeffrey D. Ullman","orcid":"https://orcid.org/0000-0002-1847-3426"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jeffrey D. Ullman","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5088568415"],"corresponding_institution_ids":["https://openalex.org/I174458059"],"apc_list":null,"apc_paid":null,"fwci":37.9353,"has_fulltext":false,"cited_by_count":331,"citation_normalized_percentile":{"value":0.9988217,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"99","last_page":"110"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/joins","display_name":"Joins","score":0.8516700267791748},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7959668040275574},{"id":"https://openalex.org/keywords/tuple","display_name":"Tuple","score":0.6978991031646729},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5959966778755188},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.5936635732650757},{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.5660618543624878},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5076802968978882},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.45642539858818054},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.42184463143348694},{"id":"https://openalex.org/keywords/replication","display_name":"Replication (statistics)","score":0.41673752665519714},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.39079296588897705},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.2837197780609131},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13265395164489746}],"concepts":[{"id":"https://openalex.org/C2778692605","wikidata":"https://www.wikidata.org/wiki/Q4041866","display_name":"Joins","level":2,"score":0.8516700267791748},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7959668040275574},{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.6978991031646729},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5959966778755188},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.5936635732650757},{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.5660618543624878},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5076802968978882},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.45642539858818054},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.42184463143348694},{"id":"https://openalex.org/C12590798","wikidata":"https://www.wikidata.org/wiki/Q3933199","display_name":"Replication (statistics)","level":2,"score":0.41673752665519714},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.39079296588897705},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2837197780609131},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13265395164489746},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":7,"locations":[{"id":"doi:10.1145/1739041.1739056","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1739041.1739056","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 13th International Conference on Extending Database Technology","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.158.3656","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.158.3656","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://infolab.stanford.edu/~ullman/pub/join-mr.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.329.9163","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.329.9163","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.edbt.org/Proceedings/2010-Lausanne/edbt/papers/p0099-Afrati.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.365.5746","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.365.5746","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://ilpubs.stanford.edu:8090/957/1/mapred-join-report.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.594.1703","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.594.1703","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.bgu.ac.il/~frankel/HonoraryDay09/Slides/JeffPaper.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.644.4077","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.644.4077","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://ilpubs.stanford.edu:8090/952/1/mapred-join-report.pdf","raw_type":"text"},{"id":"pmh:oai:dspace.lib.ntua.gr:123456789/32931","is_oa":true,"landing_page_url":"http://doi.org/10.1145/1739041.1739056","pdf_url":null,"source":{"id":"https://openalex.org/S4377196837","display_name":"DSpace - NTUA (National Technical University of Athens)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I174458059","host_organization_name":"National Technical University of Athens","host_organization_lineage":["https://openalex.org/I174458059"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Advances in Database Technology - EDBT 2010 - 13th International Conference on Extending Database Technology, Proceedings","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"pmh:oai:dspace.lib.ntua.gr:123456789/32931","is_oa":true,"landing_page_url":"http://doi.org/10.1145/1739041.1739056","pdf_url":null,"source":{"id":"https://openalex.org/S4377196837","display_name":"DSpace - NTUA (National Technical University of Athens)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I174458059","host_organization_name":"National Technical University of Athens","host_organization_lineage":["https://openalex.org/I174458059"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Advances in Database Technology - EDBT 2010 - 13th International Conference on Extending Database Technology, Proceedings","raw_type":"info:eu-repo/semantics/conferenceObject"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/15","display_name":"Life in Land","score":0.49000000953674316}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1569403765","https://openalex.org/W1597532222","https://openalex.org/W1974047326","https://openalex.org/W1981420413","https://openalex.org/W1992810975","https://openalex.org/W2000081322","https://openalex.org/W2066636486","https://openalex.org/W2075620950","https://openalex.org/W2098935637","https://openalex.org/W2115613157","https://openalex.org/W2119565742","https://openalex.org/W2127489049","https://openalex.org/W2132244350","https://openalex.org/W2135196372","https://openalex.org/W2135499008","https://openalex.org/W2138621811","https://openalex.org/W2140102533","https://openalex.org/W2150013559","https://openalex.org/W2154894831","https://openalex.org/W2168009816","https://openalex.org/W2171668176","https://openalex.org/W2173213060","https://openalex.org/W2201552871","https://openalex.org/W2203361072","https://openalex.org/W2501499209","https://openalex.org/W4233762723","https://openalex.org/W4238584892","https://openalex.org/W4253103473","https://openalex.org/W6635650928","https://openalex.org/W6684827971"],"related_works":["https://openalex.org/W2088925915","https://openalex.org/W2016456293","https://openalex.org/W2161128265","https://openalex.org/W1589812061","https://openalex.org/W2138101384","https://openalex.org/W2140894225","https://openalex.org/W2108524022","https://openalex.org/W4212828571","https://openalex.org/W2125826941","https://openalex.org/W2349616416"],"abstract_inverted_index":{"Implementations":[0],"of":[1,35,54,64,76,79,93,113,121,186],"map-reduce":[2,23,188],"are":[3,70,104,158,193],"being":[4],"used":[5],"to":[6,42,72,160,182,189],"perform":[7],"many":[8],"operations":[9],"on":[10,96],"very":[11,211],"large":[12,212],"data.":[13],"We":[14,109],"examine":[15],"strategies":[16],"for":[17,99,126],"joining":[18],"several":[19],"relations":[20],"in":[21,88,137,208],"the":[22,31,33,39,55,62,77,91,97,111,115,138,162,166,170,174,183,233],"environment.":[24],"Our":[25],"new":[26],"approach":[27],"begins":[28],"by":[29],"identifying":[30],"\"map-key,\"":[32],"set":[34],"attributes":[36,102],"that":[37,103,168],"identify":[38],"Reduce":[40,81,122],"process":[41,46],"which":[43,60,67,209],"a":[44,49,58,74,80,118,210,236],"Map":[45],"must":[47],"send":[48],"particular":[50],"tuple.":[51],"Each":[52],"attribute":[53,133],"map-key":[56,101,139,163],"gets":[57],"\"share,\"":[59],"is":[61,134,140,178,215],"number":[63,120],"buckets":[65],"into":[66],"its":[68],"values":[69],"hashed,":[71],"form":[73],"component":[75],"identifier":[78],"process.":[82],"Relations":[83],"have":[84],"their":[85,107],"tuples":[86],"replicated":[87],"limited":[89],"fashion,":[90],"degree":[92],"replication":[94],"depending":[95],"shares":[98,167],"those":[100],"missing":[105],"from":[106],"schema.":[108],"study":[110],"problem":[112],"optimizing":[114],"shares,":[116],"given":[117],"fixed":[119],"processes.":[123],"An":[124],"algorithm":[125],"detecting":[127],"and":[128,151,164,221],"fixing":[129],"problems":[130],"where":[131,200],"an":[132],"\"mistakenly\"":[135],"included":[136],"given.":[141],"Then,":[142],"we":[143,157,176],"consider":[144],"two":[145],"important":[146,195],"special":[147],"cases:":[148],"chain":[149],"joins":[150],"star":[152],"joins.":[153],"In":[154],"each":[155],"case":[156],"able":[159],"determine":[161,165],"yield":[169],"least":[171],"replication.":[172],"While":[173],"method":[175,202],"propose":[177],"not":[179],"always":[180],"superior":[181],"conventional":[184],"way":[185],"using":[187],"implement":[190],"joins,":[191],"there":[192],"some":[194],"cases":[196],"involving":[197,224],"large-scale":[198],"data":[199],"our":[201],"wins,":[203],"including:":[204],"(1)":[205],"analytic":[206],"queries":[207,223],"fact":[213],"table":[214],"joined":[216],"with":[217,228],"smaller":[218],"dimension":[219],"tables,":[220],"(2)":[222],"paths":[225],"through":[226],"graphs":[227],"high":[229],"out-degree,":[230],"such":[231],"as":[232],"Web":[234],"or":[235],"social":[237],"network.":[238]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":12},{"year":2019,"cited_by_count":20},{"year":2018,"cited_by_count":22},{"year":2017,"cited_by_count":27},{"year":2016,"cited_by_count":34},{"year":2015,"cited_by_count":37},{"year":2014,"cited_by_count":56},{"year":2013,"cited_by_count":38},{"year":2012,"cited_by_count":31}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
