{"id":"https://openalex.org/W2550585696","doi":"https://doi.org/10.1109/tpds.2016.2627558","title":"Leveraging Adaptive I/O to Optimize Collective Data Shuffling Patterns for Big Data Analytics","display_name":"Leveraging Adaptive I/O to Optimize Collective Data Shuffling Patterns for Big Data Analytics","publication_year":2016,"publication_date":"2016-11-10","ids":{"openalex":"https://openalex.org/W2550585696","doi":"https://doi.org/10.1109/tpds.2016.2627558","mag":"2550585696"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2016.2627558","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2016.2627558","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://inria.hal.science/hal-01531374","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085745891","display_name":"Bogdan Nicolae","orcid":"https://orcid.org/0000-0002-0661-7509"},"institutions":[{"id":"https://openalex.org/I4210145784","display_name":"IBM Research - Ireland","ror":"https://ror.org/04jnxr720","country_code":"IE","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210145784"]}],"countries":["IE"],"is_corresponding":true,"raw_author_name":"Bogdan Nicolae","raw_affiliation_strings":["IBM Research, Dublin, Ireland"],"affiliations":[{"raw_affiliation_string":"IBM Research, Dublin, Ireland","institution_ids":["https://openalex.org/I4210145784"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102349258","display_name":"Carlos H. \u00c3. Costa","orcid":null},"institutions":[{"id":"https://openalex.org/I4210114115","display_name":"IBM Research - Thomas J. Watson Research Center","ror":"https://ror.org/0265w5591","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Carlos H. A. Costa","raw_affiliation_strings":["IBM Thomas J. Watson Research Center, Yorktown Heights, NY","IBM Thomas J. Watson Res. Center, Yorktown Heights, NY#TAB#"],"affiliations":[{"raw_affiliation_string":"IBM Thomas J. Watson Research Center, Yorktown Heights, NY","institution_ids":["https://openalex.org/I4210114115"]},{"raw_affiliation_string":"IBM Thomas J. Watson Res. Center, Yorktown Heights, NY#TAB#","institution_ids":["https://openalex.org/I4210114115"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007904705","display_name":"Claudia Misale","orcid":"https://orcid.org/0000-0003-3737-7373"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Claudia Misale","raw_affiliation_strings":["University of Torino, Torino, Italy","University of Torino  Torino Italy"],"affiliations":[{"raw_affiliation_string":"University of Torino, Torino, Italy","institution_ids":[]},{"raw_affiliation_string":"University of Torino  Torino Italy","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000062930","display_name":"Kostas Katrinis","orcid":null},"institutions":[{"id":"https://openalex.org/I4210145784","display_name":"IBM Research - Ireland","ror":"https://ror.org/04jnxr720","country_code":"IE","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210145784"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Kostas Katrinis","raw_affiliation_strings":["IBM Research, Dublin, Ireland"],"affiliations":[{"raw_affiliation_string":"IBM Research, Dublin, Ireland","institution_ids":["https://openalex.org/I4210145784"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082423487","display_name":"Yoonho Park","orcid":"https://orcid.org/0000-0002-9837-0741"},"institutions":[{"id":"https://openalex.org/I4210114115","display_name":"IBM Research - Thomas J. Watson Research Center","ror":"https://ror.org/0265w5591","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yoonho Park","raw_affiliation_strings":["IBM Thomas J. Watson Research Center, Yorktown Heights, NY","IBM Thomas J. Watson Res. Center, Yorktown Heights, NY#TAB#"],"affiliations":[{"raw_affiliation_string":"IBM Thomas J. Watson Research Center, Yorktown Heights, NY","institution_ids":["https://openalex.org/I4210114115"]},{"raw_affiliation_string":"IBM Thomas J. Watson Res. Center, Yorktown Heights, NY#TAB#","institution_ids":["https://openalex.org/I4210114115"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5085745891"],"corresponding_institution_ids":["https://openalex.org/I4210145784"],"apc_list":null,"apc_paid":null,"fwci":9.9521,"has_fulltext":false,"cited_by_count":24,"citation_normalized_percentile":{"value":0.97997923,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"28","issue":"6","first_page":"1663","last_page":"1674"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8657995462417603},{"id":"https://openalex.org/keywords/shuffling","display_name":"Shuffling","score":0.8127943277359009},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7505018711090088},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.7228360176086426},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.6661165952682495},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.464780330657959},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.45158371329307556},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4296642243862152},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.31179511547088623},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.18980160355567932},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.16293790936470032}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8657995462417603},{"id":"https://openalex.org/C167927819","wikidata":"https://www.wikidata.org/wiki/Q1930567","display_name":"Shuffling","level":2,"score":0.8127943277359009},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7505018711090088},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.7228360176086426},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.6661165952682495},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.464780330657959},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.45158371329307556},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4296642243862152},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.31179511547088623},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.18980160355567932},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.16293790936470032},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tpds.2016.2627558","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2016.2627558","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},{"id":"pmh:oai:HAL:hal-01531374v1","is_oa":true,"landing_page_url":"https://inria.hal.science/hal-01531374","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems, 2017, 28 (6), pp.1663 - 1674. &#x27E8;10.1109/TPDS.2016.2627558&#x27E9;","raw_type":"Journal articles"},{"id":"pmh:oai:iris.unito.it:2318/1624908","is_oa":true,"landing_page_url":"http://hdl.handle.net/2318/1624908","pdf_url":null,"source":{"id":"https://openalex.org/S4306400637","display_name":"Institutional Research Information System University of Turin (University of Turin)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I55143463","host_organization_name":"University of Turin","host_organization_lineage":["https://openalex.org/I55143463"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-01531374v1","is_oa":true,"landing_page_url":"https://inria.hal.science/hal-01531374","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems, 2017, 28 (6), pp.1663 - 1674. &#x27E8;10.1109/TPDS.2016.2627558&#x27E9;","raw_type":"Journal articles"},"sustainable_development_goals":[{"score":0.4300000071525574,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W5211984","https://openalex.org/W1565405831","https://openalex.org/W1572669544","https://openalex.org/W1661634644","https://openalex.org/W1746258828","https://openalex.org/W1756536912","https://openalex.org/W1874851952","https://openalex.org/W1968075755","https://openalex.org/W1974534485","https://openalex.org/W2025669595","https://openalex.org/W2038157364","https://openalex.org/W2058340614","https://openalex.org/W2059628440","https://openalex.org/W2067005380","https://openalex.org/W2071704971","https://openalex.org/W2082047517","https://openalex.org/W2118495646","https://openalex.org/W2126583885","https://openalex.org/W2131513758","https://openalex.org/W2157099238","https://openalex.org/W2157490430","https://openalex.org/W2173213060","https://openalex.org/W2188889839","https://openalex.org/W2295302693","https://openalex.org/W2482213519","https://openalex.org/W2496033776","https://openalex.org/W2620485168","https://openalex.org/W4240086822","https://openalex.org/W4251459357","https://openalex.org/W6600239168","https://openalex.org/W6637756608","https://openalex.org/W6637806892","https://openalex.org/W6687139329","https://openalex.org/W6996886134"],"related_works":["https://openalex.org/W2766461310","https://openalex.org/W4247566972","https://openalex.org/W4388692845","https://openalex.org/W3202731209","https://openalex.org/W3211874991","https://openalex.org/W2989751065","https://openalex.org/W4308507533","https://openalex.org/W2407107767","https://openalex.org/W2901787049","https://openalex.org/W2792181990"],"abstract_inverted_index":{"Big":[0],"data":[1,22,41,52,75,91,106,156,183],"analytics":[2,184],"is":[3,54,77,131,142,148],"an":[4,198],"indispensable":[5],"tool":[6],"in":[7,178],"transforming":[8],"science,":[9],"engineering,":[10],"medicine,":[11],"health-care,":[12],"finance":[13],"and":[14,24,70,107,118,231],"ultimately":[15],"business":[16],"itself.":[17],"With":[18],"the":[19,67,90,94,124,161,168,171,188,209],"explosion":[20],"of":[21,58,126,190],"sizes":[23],"need":[25],"for":[26,123,229],"shorter":[27],"time-to-solution,":[28],"in-memory":[29],"platforms":[30],"such":[31],"as":[32],"Apache":[33],"Spark":[34,211],"gain":[35],"increasing":[36],"popularity.":[37],"In":[38],"this":[39,82,175],"context,":[40],"shuffling,":[42],"a":[43,55,63,78,132,153,180],"particularly":[44],"difficult":[45],"transformation":[46],"pattern,":[47],"introduces":[48],"important":[49],"challenges.":[50],"Specifically,":[51],"shuffling":[53,76,95],"key":[56],"component":[57],"complex":[59],"computations":[60],"that":[61,137,159],"has":[62],"major":[64],"impact":[65],"on":[66,88,197],"overall":[68],"performance":[69,117,222],"scalability.":[71,234],"Thus,":[72],"speeding":[73],"up":[74,217],"critical":[79],"goal.":[80],"To":[81,186],"end,":[83],"state-of-the-art":[84],"solutions":[85],"often":[86],"rely":[87],"overlapping":[89],"transfers":[92],"with":[93,201,208,223],"phase.":[96],"However,":[97],"they":[98],"employ":[99],"simple":[100],"mechanisms":[101],"to":[102,109,115,170,218],"decide":[103],"how":[104],"much":[105],"where":[108],"fetch":[110],"it":[111],"from,":[112],"which":[113],"leads":[114],"sub-optimal":[116],"excessive":[119],"auxiliary":[120],"memory":[121,138,227],"utilization":[122,228],"purpose":[125],"prefetching.":[127],"The":[128],"latter":[129],"aspect":[130],"growing":[133],"concern,":[134],"given":[135],"evidence":[136],"per":[139,205],"computation":[140],"unit":[141],"continuously":[143],"decreasing":[144],"while":[145],"interconnect":[146],"bandwidth":[147],"increasing.":[149],"This":[150],"paper":[151],"contributes":[152],"novel":[154,176],"shuffle":[155,212],"transfer":[157],"strategy":[158,177],"addresses":[160],"two":[162],"aforementioned":[163],"dimensions":[164],"by":[165],"dynamically":[166],"adapting":[167],"prefetching":[169],"computation.":[172],"We":[173],"implemented":[174],"Spark,":[179],"popular":[181],"inmemory":[182],"framework.":[185],"demonstrate":[187],"benefits":[189],"our":[191,214],"proposal,":[192],"we":[193],"run":[194],"extensive":[195],"experiments":[196],"HPC":[199],"cluster":[200],"large":[202],"core":[203],"count":[204],"node.":[206],"Compared":[207],"default":[210],"strategy,":[213],"proposal":[215],"shows:":[216],"40":[219],"percent":[220,225],"better":[221],"50":[224],"less":[226],"buffering":[230],"excellent":[232],"weak":[233]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
