{"id":"https://openalex.org/W2141267666","doi":"https://doi.org/10.1145/1807128.1807140","title":"Skew-resistant parallel processing of feature-extracting scientific user-defined functions","display_name":"Skew-resistant parallel processing of feature-extracting scientific user-defined functions","publication_year":2010,"publication_date":"2010-06-10","ids":{"openalex":"https://openalex.org/W2141267666","doi":"https://doi.org/10.1145/1807128.1807140","mag":"2141267666"},"language":"en","primary_location":{"id":"doi:10.1145/1807128.1807140","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1807128.1807140","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 1st ACM symposium on Cloud computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102246987","display_name":"YongChul Kwon","orcid":"https://orcid.org/0009-0002-8525-2735"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"YongChul Kwon","raw_affiliation_strings":["University of Washington, Seattle, WA, USA"],"affiliations":[{"raw_affiliation_string":"University of Washington, Seattle, WA, USA","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064988079","display_name":"Magdalena Ba\u0142azi\u0144ska","orcid":"https://orcid.org/0000-0002-6805-0325"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Magdalena Balazinska","raw_affiliation_strings":["University of Washington, Seattle, WA, USA"],"affiliations":[{"raw_affiliation_string":"University of Washington, Seattle, WA, USA","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007124763","display_name":"Bill Howe","orcid":"https://orcid.org/0000-0001-8588-8472"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bill Howe","raw_affiliation_strings":["University of Washington, Seattle, WA, USA"],"affiliations":[{"raw_affiliation_string":"University of Washington, Seattle, WA, USA","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108674089","display_name":"Jerome Rolia","orcid":null},"institutions":[{"id":"https://openalex.org/I1324840837","display_name":"Hewlett-Packard (United States)","ror":"https://ror.org/059rn9488","country_code":"US","type":"company","lineage":["https://openalex.org/I1324840837"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jerome Rolia","raw_affiliation_strings":["HP Labs, Bristol, UNK, United Kingdom","HP Labs, Bristol, UNK, United Kingdom#TAB#"],"affiliations":[{"raw_affiliation_string":"HP Labs, Bristol, UNK, United Kingdom","institution_ids":[]},{"raw_affiliation_string":"HP Labs, Bristol, UNK, United Kingdom#TAB#","institution_ids":["https://openalex.org/I1324840837"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102246987"],"corresponding_institution_ids":["https://openalex.org/I201448701"],"apc_list":null,"apc_paid":null,"fwci":32.1674,"has_fulltext":false,"cited_by_count":128,"citation_normalized_percentile":{"value":0.99584451,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"75","last_page":"86"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8345053195953369},{"id":"https://openalex.org/keywords/skew","display_name":"Skew","score":0.7264629602432251},{"id":"https://openalex.org/keywords/parameterized-complexity","display_name":"Parameterized complexity","score":0.6278054714202881},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.5996621251106262},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.48715874552726746},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.4643018841743469},{"id":"https://openalex.org/keywords/factor","display_name":"Factor (programming language)","score":0.42983928322792053},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.4150419533252716},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3995862603187561},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.39347517490386963},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3641000986099243},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.20365819334983826},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.12054044008255005}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8345053195953369},{"id":"https://openalex.org/C43711488","wikidata":"https://www.wikidata.org/wiki/Q7534783","display_name":"Skew","level":2,"score":0.7264629602432251},{"id":"https://openalex.org/C165464430","wikidata":"https://www.wikidata.org/wiki/Q1570441","display_name":"Parameterized complexity","level":2,"score":0.6278054714202881},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.5996621251106262},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.48715874552726746},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.4643018841743469},{"id":"https://openalex.org/C2781039887","wikidata":"https://www.wikidata.org/wiki/Q1391724","display_name":"Factor (programming language)","level":2,"score":0.42983928322792053},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.4150419533252716},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3995862603187561},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.39347517490386963},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3641000986099243},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.20365819334983826},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.12054044008255005},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/1807128.1807140","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1807128.1807140","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 1st ACM symposium on Cloud computing","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.207.3188","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.207.3188","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.washington.edu/homes/yongchul/papers/socc2010.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.225.9198","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.225.9198","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.washington.edu/homes/magda/papers/kwon-socc10.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320312143","display_name":"National Centre for Supercomputing Applications","ror":"https://ror.org/03r10zj06"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W157967172","https://openalex.org/W1493893823","https://openalex.org/W1520750340","https://openalex.org/W1570320352","https://openalex.org/W1597532222","https://openalex.org/W1964374591","https://openalex.org/W1984872340","https://openalex.org/W1995180519","https://openalex.org/W1997020216","https://openalex.org/W2017540033","https://openalex.org/W2019485818","https://openalex.org/W2024680396","https://openalex.org/W2032469582","https://openalex.org/W2037027334","https://openalex.org/W2042956136","https://openalex.org/W2044781714","https://openalex.org/W2082820126","https://openalex.org/W2091842814","https://openalex.org/W2098935637","https://openalex.org/W2100830825","https://openalex.org/W2104680817","https://openalex.org/W2111516947","https://openalex.org/W2113671562","https://openalex.org/W2114103324","https://openalex.org/W2114303224","https://openalex.org/W2117466692","https://openalex.org/W2121253712","https://openalex.org/W2122465391","https://openalex.org/W2141709306","https://openalex.org/W2151487638","https://openalex.org/W2156441202","https://openalex.org/W2171668176","https://openalex.org/W2503455548","https://openalex.org/W2766000922","https://openalex.org/W2990223878","https://openalex.org/W3098093324","https://openalex.org/W4206606839","https://openalex.org/W6635650928","https://openalex.org/W6673643799"],"related_works":["https://openalex.org/W2051058708","https://openalex.org/W154868527","https://openalex.org/W1494268238","https://openalex.org/W1983207144","https://openalex.org/W2490706771","https://openalex.org/W2480116122","https://openalex.org/W1976468483","https://openalex.org/W1516574938","https://openalex.org/W4387423656","https://openalex.org/W4389136702"],"abstract_inverted_index":{"Scientists":[0],"today":[1],"have":[2],"the":[3,32,56,81,128,131,148],"ability":[4],"to":[5,22,27,43,117,146,151,177,180],"generate":[6],"data":[7,24,61,150,158],"at":[8],"an":[9,135],"unprecedented":[10],"scale":[11],"and":[12,93,98,123],"rate":[13],"and,":[14],"as":[15,63],"a":[16,65,71,106,173,181],"result,":[17],"they":[18],"must":[19],"increasingly":[20],"turn":[21],"parallel":[23],"processing":[25],"engines":[26,38],"perform":[28],"their":[29],"analyses.":[30],"However,":[31],"simple":[33],"execution":[34,170],"model":[35],"of":[36,58,83,112,130,175],"these":[37],"can":[39,94,168],"make":[40],"it":[41],"difficult":[42],"implement":[44],"efficient":[45],"algorithms":[46],"for":[47],"scientific":[48,53],"analytics.":[49],"In":[50,100],"particular,":[51],"many":[52],"analytics":[54],"require":[55],"extraction":[57,121],"features":[59],"from":[60,159],"represented":[62],"either":[64],"multidimensional":[66,72],"array":[67],"or":[68],"points":[69],"in":[70],"space.":[73],"These":[74],"applications":[75],"exhibit":[76],"significant":[77],"computational":[78,153],"skew,":[79],"where":[80],"runtime":[82],"different":[84,161],"partitions":[85],"depends":[86],"on":[87,110,156],"more":[88],"than":[89],"just":[90],"input":[91,149],"size":[92],"therefore":[95],"vary":[96],"dramatically":[97],"unpredictably.":[99],"this":[101],"paper,":[102],"we":[103],"present":[104],"SkewReduce,":[105],"new":[107],"system":[108,133],"implemented":[109],"top":[111],"Hadoop":[113],"that":[114,142,165],"enables":[115],"users":[116],"easily":[118],"express":[119],"feature":[120],"analyses":[122],"execute":[124],"them":[125],"efficiently.":[126],"At":[127],"heart":[129],"SkewReduce":[132],"is":[134],"optimizer,":[136],"parameterized":[137],"by":[138,172],"user-defined":[139],"cost":[140],"functions,":[141],"determines":[143],"how":[144],"best":[145],"partition":[147],"minimize":[152],"skew.":[154],"Experiments":[155],"real":[157],"two":[160],"science":[162],"domains":[163],"demonstrate":[164],"our":[166],"approach":[167],"improve":[169],"times":[171],"factor":[174],"up":[176],"8":[178],"compared":[179],"naive":[182],"implementation.":[183]},"counts_by_year":[{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":15},{"year":2017,"cited_by_count":13},{"year":2016,"cited_by_count":9},{"year":2015,"cited_by_count":12},{"year":2014,"cited_by_count":13},{"year":2013,"cited_by_count":14},{"year":2012,"cited_by_count":14}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
