{"id":"https://openalex.org/W2899824649","doi":"https://doi.org/10.1109/tpds.2018.2880189","title":"Wide-Area Spark Streaming: Automated Routing and Batch Sizing","display_name":"Wide-Area Spark Streaming: Automated Routing and Batch Sizing","publication_year":2018,"publication_date":"2018-11-09","ids":{"openalex":"https://openalex.org/W2899824649","doi":"https://doi.org/10.1109/tpds.2018.2880189","mag":"2899824649"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2018.2880189","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2018.2880189","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100397204","display_name":"Wenxin Li","orcid":"https://orcid.org/0000-0001-8495-5821"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wenxin Li","raw_affiliation_strings":["School of Computer Science and Technology, Dalian University of Technology, Dalian, Liaoning, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Dalian University of Technology, Dalian, Liaoning, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032424832","display_name":"Di Niu","orcid":"https://orcid.org/0000-0002-5250-7327"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Di Niu","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Alberta, Edmonton, AB, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Alberta, Edmonton, AB, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110804515","display_name":"Yinan Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Yinan Liu","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Toronto, ON, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Toronto, ON, Canada","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100759864","display_name":"Shuhao Liu","orcid":"https://orcid.org/0000-0002-4892-0979"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Shuhao Liu","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Toronto, ON, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Toronto, ON, Canada","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083596391","display_name":"Baochun Li","orcid":"https://orcid.org/0000-0003-2404-0974"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Baochun Li","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Toronto, ON, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Toronto, ON, Canada","institution_ids":["https://openalex.org/I185261750"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100397204"],"corresponding_institution_ids":["https://openalex.org/I27357992"],"apc_list":null,"apc_paid":null,"fwci":1.2917,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.829914,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"30","issue":"6","first_page":"1434","last_page":"1448"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8922477960586548},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.6373714208602905},{"id":"https://openalex.org/keywords/stream-processing","display_name":"Stream processing","score":0.6273273229598999},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.6041954755783081},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.592960774898529},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.5416508316993713},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5016696453094482},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.43354368209838867},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.29051852226257324},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.25319844484329224},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.11684319376945496}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8922477960586548},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.6373714208602905},{"id":"https://openalex.org/C107027933","wikidata":"https://www.wikidata.org/wiki/Q2006448","display_name":"Stream processing","level":2,"score":0.6273273229598999},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.6041954755783081},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.592960774898529},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.5416508316993713},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5016696453094482},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.43354368209838867},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.29051852226257324},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.25319844484329224},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.11684319376945496},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2018.2880189","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2018.2880189","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1230574503","https://openalex.org/W1249613807","https://openalex.org/W1510496002","https://openalex.org/W1534130940","https://openalex.org/W1969299781","https://openalex.org/W1976821017","https://openalex.org/W1984220394","https://openalex.org/W2004255221","https://openalex.org/W2010801412","https://openalex.org/W2035309843","https://openalex.org/W2086392024","https://openalex.org/W2096092966","https://openalex.org/W2098395496","https://openalex.org/W2107861471","https://openalex.org/W2111495242","https://openalex.org/W2114103324","https://openalex.org/W2131975293","https://openalex.org/W2135046866","https://openalex.org/W2141684031","https://openalex.org/W2151249000","https://openalex.org/W2153972927","https://openalex.org/W2169070268","https://openalex.org/W2169466806","https://openalex.org/W2267569004","https://openalex.org/W2496080211","https://openalex.org/W2521236993","https://openalex.org/W2556111255","https://openalex.org/W2562897376","https://openalex.org/W2604783387","https://openalex.org/W2745136502","https://openalex.org/W2762397184","https://openalex.org/W2767447792","https://openalex.org/W2872933037","https://openalex.org/W2913854892","https://openalex.org/W4292363360","https://openalex.org/W6628075071","https://openalex.org/W6628088091","https://openalex.org/W6630529663","https://openalex.org/W6632020100","https://openalex.org/W6675036873","https://openalex.org/W6679815717","https://openalex.org/W6731119795","https://openalex.org/W6736413256"],"related_works":["https://openalex.org/W2160425906","https://openalex.org/W1882733036","https://openalex.org/W1271724408","https://openalex.org/W117162150","https://openalex.org/W2546696010","https://openalex.org/W1992741870","https://openalex.org/W3131615421","https://openalex.org/W2109998134","https://openalex.org/W3130219819","https://openalex.org/W2024953199"],"abstract_inverted_index":{"Modern":[0],"stream":[1,16],"processing":[2,17,43],"frameworks,":[3],"such":[4,19],"as":[5,20,200],"Spark":[6,81,154,205],"Streaming,":[7],"are":[8,52],"designed":[9,54],"to":[10,32,55,65,84,114,136,151,202],"support":[11,137],"a":[12,39,110,117,133,144,158],"wide":[13],"variety":[14],"of":[15,30,78,140,190,196],"applications,":[18],"real-time":[21],"data":[22,31,91],"analytics":[23],"in":[24,101],"social":[25],"networks.":[26,103],"As":[27,157],"the":[28,74,152,194,203],"volume":[29],"be":[33],"processed":[34],"increases":[35],"rapidly,":[36],"there":[37],"is":[38],"pressing":[40],"need":[41],"for":[42],"them":[44],"across":[45,98],"multiple":[46],"geo-distributed":[47,99],"datacenters.":[48],"However,":[49],"these":[50,106],"frameworks":[51],"not":[53],"take":[56,132],"limited":[57],"and":[58,76,86,94,183,208],"varying":[59],"inter-datacenter":[60],"bandwidth":[61,181],"into":[62],"account,":[63],"leading":[64],"longer":[66],"query":[67],"latencies.":[68],"In":[69],"this":[70],"paper,":[71],"we":[72,108,131,148],"present":[73],"design":[75],"implementation":[77,161,175],"an":[79],"extended":[80],"Streaming":[82,155,206],"framework":[83],"automatically":[85],"optimally":[87],"schedule":[88],"tasks,":[89],"select":[90],"flow":[92],"routes":[93],"determine":[95],"micro-batch":[96],"sizes":[97],"datacenters":[100],"wide-area":[102],"To":[104],"make":[105],"decisions,":[107],"propose":[109],"sparsity-regularized":[111],"ADMM":[112],"algorithm":[113],"efficiently":[115],"solve":[116],"nonconvex":[118],"optimization":[119],"problem,":[120],"based":[121],"on":[122,176,187],"readily":[123],"measurable":[124],"operating":[125],"traces.":[126],"Toward":[127],"incremental":[128],"real-world":[129],"deployment,":[130],"non-intrusive":[134],"approach":[135],"flexible":[138],"routing":[139],"micro-batches":[141],"by":[142,166],"adding":[143],"new":[145],"DStream":[146],"transformation":[147],"have":[149,172,192],"developed":[150],"existing":[153,204],"framework.":[156],"result,":[159],"our":[160,174,184,197],"can":[162],"enforce":[163],"scheduling":[164],"decisions":[165],"modifying":[167],"application":[168],"workflows":[169],"only.":[170],"We":[171],"deployed":[173],"Amazon":[177],"EC2":[178],"with":[179],"emulated":[180],"constraints,":[182],"experimental":[185],"results":[186],"various":[188],"types":[189],"queries":[191],"demonstrated":[193],"effectiveness":[195],"proposed":[198],"framework,":[199],"compared":[201],"scheduler":[207],"other":[209],"data-locality-based":[210],"heuristics.":[211]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
