{"id":"https://openalex.org/W4210382907","doi":"https://doi.org/10.14778/3494124.3494152","title":"Flexible rule-based decomposition and metadata independence in modin","display_name":"Flexible rule-based decomposition and metadata independence in modin","publication_year":2021,"publication_date":"2021-11-01","ids":{"openalex":"https://openalex.org/W4210382907","doi":"https://doi.org/10.14778/3494124.3494152"},"language":"en","primary_location":{"id":"doi:10.14778/3494124.3494152","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3494124.3494152","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026228186","display_name":"Devin Petersohn","orcid":null},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Devin Petersohn","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110710845","display_name":"Dixin Tang","orcid":"https://orcid.org/0000-0002-3316-6651"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]},{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dixin Tang","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086773354","display_name":"Rehan Durrani","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]},{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rehan Durrani","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061196344","display_name":"Areg Melik-Adamyan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210158342","display_name":"Intel (United Kingdom)","ror":"https://ror.org/058cxws58","country_code":"GB","type":"company","lineage":["https://openalex.org/I1343180700","https://openalex.org/I4210158342"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Areg Melik-Adamyan","raw_affiliation_strings":["Intel"],"affiliations":[{"raw_affiliation_string":"Intel","institution_ids":["https://openalex.org/I4210158342"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072427753","display_name":"Joseph E. Gonzalez","orcid":"https://orcid.org/0000-0003-2921-956X"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]},{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Joseph E. Gonzalez","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017381278","display_name":"Anthony D. Joseph","orcid":"https://orcid.org/0000-0002-6798-9664"},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anthony D. Joseph","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013608601","display_name":"Aditya Parameswaran","orcid":"https://orcid.org/0000-0002-4538-4752"},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aditya G. Parameswaran","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5026228186"],"corresponding_institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":1.5281,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.84070357,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":100},"biblio":{"volume":"15","issue":"3","first_page":"739","last_page":"751"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.9208759069442749},{"id":"https://openalex.org/keywords/independence","display_name":"Independence (probability theory)","score":0.7631325721740723},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7546844482421875},{"id":"https://openalex.org/keywords/toolbox","display_name":"Toolbox","score":0.6988035440444946},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6024873852729797},{"id":"https://openalex.org/keywords/decomposition","display_name":"Decomposition","score":0.5886251926422119},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.43661150336265564},{"id":"https://openalex.org/keywords/metadata-repository","display_name":"Metadata repository","score":0.4249113202095032},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.42395079135894775},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.4132442772388458},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.39380520582199097},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.21303194761276245},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.19160184264183044},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10633489489555359}],"concepts":[{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.9208759069442749},{"id":"https://openalex.org/C35651441","wikidata":"https://www.wikidata.org/wiki/Q625303","display_name":"Independence (probability theory)","level":2,"score":0.7631325721740723},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7546844482421875},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.6988035440444946},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6024873852729797},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.5886251926422119},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.43661150336265564},{"id":"https://openalex.org/C153048206","wikidata":"https://www.wikidata.org/wiki/Q3454922","display_name":"Metadata repository","level":3,"score":0.4249113202095032},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.42395079135894775},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4132442772388458},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.39380520582199097},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.21303194761276245},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.19160184264183044},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10633489489555359},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3494124.3494152","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3494124.3494152","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.6100000143051147,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1605782097","https://openalex.org/W1791587242","https://openalex.org/W1967091776","https://openalex.org/W1981420413","https://openalex.org/W2014830756","https://openalex.org/W2038412523","https://openalex.org/W2065961934","https://openalex.org/W2068410942","https://openalex.org/W2074935284","https://openalex.org/W2125775320","https://openalex.org/W2142184646","https://openalex.org/W2168903001","https://openalex.org/W2169251528","https://openalex.org/W2394680079","https://openalex.org/W2430301697","https://openalex.org/W2527672088","https://openalex.org/W2534983812","https://openalex.org/W2574839832","https://openalex.org/W2582743722","https://openalex.org/W2780624097","https://openalex.org/W2888965704","https://openalex.org/W3008534594","https://openalex.org/W3085940077","https://openalex.org/W3086179797","https://openalex.org/W3135035073","https://openalex.org/W3135575949","https://openalex.org/W3147342876","https://openalex.org/W4312258136","https://openalex.org/W6678708722","https://openalex.org/W6816811543"],"related_works":["https://openalex.org/W1552553528","https://openalex.org/W2183628870","https://openalex.org/W3023161639","https://openalex.org/W2008531296","https://openalex.org/W2782431616","https://openalex.org/W2379265733","https://openalex.org/W2394393789","https://openalex.org/W2374379029","https://openalex.org/W1503116306","https://openalex.org/W4299935056"],"abstract_inverted_index":{"Dataframes":[0],"have":[1],"become":[2],"universally":[3],"popular":[4],"as":[5,38,109],"a":[6,20,59,79],"means":[7],"to":[8,46,106,135,159],"represent":[9],"data":[10,31],"in":[11,29,98],"various":[12],"stages":[13],"of":[14,23,82],"structure,":[15],"and":[16,70,111,119,125,142,149],"manipulate":[17],"it":[18],"using":[19],"rich":[21],"set":[22,81],"operators---thereby":[24],"becoming":[25],"an":[26],"essential":[27],"tool":[28],"the":[30,116],"scientists'":[32],"toolbox.":[33],"However,":[34],"dataframe":[35,61],"systems,":[36],"such":[37,161],"pandas,":[39],"scale":[40],"poorly---and":[41],"are":[42,85,157],"non-interactive":[43],"on":[44,144],"moderate":[45],"large":[47,146],"datasets.":[48],"We":[49,101],"discuss":[50],"our":[51,55],"experiences":[52],"developing":[53],"Modin,":[54],"first":[56],"cut":[57],"at":[58],"parallel":[60],"system,":[62],"which":[63],"already":[64],"has":[65],"users":[66],"across":[67,139],"several":[68],"industries":[69],"over":[71],"1M":[72],"downloads.":[73],"Modin":[74,132],"translates":[75],"pandas":[76,137],"functions":[77],"into":[78],"core":[80],"operators":[83],"that":[84,95,152],"individually":[86],"parallelized":[87],"via":[88],"columnar,":[89],"row-wise,":[90],"or":[91,156],"cell-wise":[92],"decomposition":[93,124],"rules":[94],"we":[96],"formalize":[97],"this":[99],"paper.":[100],"also":[102,164],"introduce":[103],"metadata":[104,126],"independence":[105],"allow":[107],"metadata---such":[108],"order":[110],"type---to":[112],"be":[113],"decoupled":[114],"from":[115],"physical":[117],"representation":[118],"maintained":[120],"lazily.":[121],"Using":[122],"rule-based":[123],"independence,":[127],"along":[128],"with":[129],"careful":[130],"engineering,":[131],"is":[133],"able":[134],"support":[136,160],"operations":[138],"both":[140],"rows":[141],"columns":[143],"very":[145],"dataframes---unlike":[147],"Koalas":[148],"Dask":[150],"DataFrames":[151],"either":[153],"break":[154],"down":[155],"unable":[158],"operations,":[162],"while":[163],"being":[165],"much":[166],"faster":[167],"than":[168],"pandas.":[169]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":4}],"updated_date":"2026-02-25T08:12:03.925757","created_date":"2025-10-10T00:00:00"}
