{"id":"https://openalex.org/W3007341125","doi":"https://doi.org/10.1109/bigdata47090.2019.9006547","title":"Cluster-size optimization within a cloud-based ETL framework for Big Data","display_name":"Cluster-size optimization within a cloud-based ETL framework for Big Data","publication_year":2019,"publication_date":"2019-12-01","ids":{"openalex":"https://openalex.org/W3007341125","doi":"https://doi.org/10.1109/bigdata47090.2019.9006547","mag":"3007341125"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata47090.2019.9006547","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata47090.2019.9006547","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000303065","display_name":"Eftim Zdravevski","orcid":"https://orcid.org/0000-0001-7664-0168"},"institutions":[{"id":"https://openalex.org/I76245029","display_name":"Saints Cyril and Methodius University of Skopje","ror":"https://ror.org/02wk2vx54","country_code":"MK","type":"education","lineage":["https://openalex.org/I76245029"]}],"countries":["MK"],"is_corresponding":true,"raw_author_name":"Eftim Zdravevski","raw_affiliation_strings":["Faculty of Computer Science and Engineering, Ss Cyril and Methodius University, Skopje, North Macedonia"],"affiliations":[{"raw_affiliation_string":"Faculty of Computer Science and Engineering, Ss Cyril and Methodius University, Skopje, North Macedonia","institution_ids":["https://openalex.org/I76245029"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030441606","display_name":"Petre Lameski","orcid":"https://orcid.org/0000-0002-5336-1796"},"institutions":[{"id":"https://openalex.org/I76245029","display_name":"Saints Cyril and Methodius University of Skopje","ror":"https://ror.org/02wk2vx54","country_code":"MK","type":"education","lineage":["https://openalex.org/I76245029"]}],"countries":["MK"],"is_corresponding":false,"raw_author_name":"Petre Lameski","raw_affiliation_strings":["Faculty of Computer Science and Engineering, Ss Cyril and Methodius University, Skopje, North Macedonia"],"affiliations":[{"raw_affiliation_string":"Faculty of Computer Science and Engineering, Ss Cyril and Methodius University, Skopje, North Macedonia","institution_ids":["https://openalex.org/I76245029"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018793332","display_name":"Ace Dimitrievski","orcid":"https://orcid.org/0000-0002-2744-0845"},"institutions":[{"id":"https://openalex.org/I76245029","display_name":"Saints Cyril and Methodius University of Skopje","ror":"https://ror.org/02wk2vx54","country_code":"MK","type":"education","lineage":["https://openalex.org/I76245029"]}],"countries":["MK"],"is_corresponding":false,"raw_author_name":"Ace Dimitrievski","raw_affiliation_strings":["Faculty of Computer Science and Engineering, Ss Cyril and Methodius University, Skopje, North Macedonia"],"affiliations":[{"raw_affiliation_string":"Faculty of Computer Science and Engineering, Ss Cyril and Methodius University, Skopje, North Macedonia","institution_ids":["https://openalex.org/I76245029"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089208377","display_name":"Marek Grzegorowski","orcid":"https://orcid.org/0000-0003-4740-0725"},"institutions":[{"id":"https://openalex.org/I4654613","display_name":"University of Warsaw","ror":"https://ror.org/039bjqg32","country_code":"PL","type":"education","lineage":["https://openalex.org/I4654613"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Marek Grzegorowski","raw_affiliation_strings":["Institute of Informatics, University of Warsaw, Poland"],"affiliations":[{"raw_affiliation_string":"Institute of Informatics, University of Warsaw, Poland","institution_ids":["https://openalex.org/I4654613"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013668860","display_name":"Cas Apanowicz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cas Apanowicz","raw_affiliation_strings":["CogniTrek Corp., Toronto, Canada"],"affiliations":[{"raw_affiliation_string":"CogniTrek Corp., Toronto, Canada","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5000303065"],"corresponding_institution_ids":["https://openalex.org/I76245029"],"apc_list":null,"apc_paid":null,"fwci":4.8522,"has_fulltext":false,"cited_by_count":26,"citation_normalized_percentile":{"value":0.95079216,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"2019","issue":null,"first_page":"3754","last_page":"3763"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.9793999791145325,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8610098361968994},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7544972896575928},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.6368382573127747},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.5354850888252258},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5201409459114075},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4995267391204834},{"id":"https://openalex.org/keywords/volume","display_name":"Volume (thermodynamics)","score":0.4776269793510437},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.47517910599708557},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4640682339668274},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.4513024091720581},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.43334391713142395},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.430192232131958},{"id":"https://openalex.org/keywords/data-aggregator","display_name":"Data aggregator","score":0.4120684564113617},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.16130822896957397},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.12194794416427612},{"id":"https://openalex.org/keywords/wireless-sensor-network","display_name":"Wireless sensor network","score":0.09331151843070984}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8610098361968994},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7544972896575928},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.6368382573127747},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.5354850888252258},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5201409459114075},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4995267391204834},{"id":"https://openalex.org/C20556612","wikidata":"https://www.wikidata.org/wiki/Q4469374","display_name":"Volume (thermodynamics)","level":2,"score":0.4776269793510437},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.47517910599708557},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4640682339668274},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.4513024091720581},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.43334391713142395},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.430192232131958},{"id":"https://openalex.org/C82578977","wikidata":"https://www.wikidata.org/wiki/Q16773055","display_name":"Data aggregator","level":3,"score":0.4120684564113617},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.16130822896957397},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.12194794416427612},{"id":"https://openalex.org/C24590314","wikidata":"https://www.wikidata.org/wiki/Q336038","display_name":"Wireless sensor network","level":2,"score":0.09331151843070984},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/bigdata47090.2019.9006547","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata47090.2019.9006547","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"mag:3040895243","is_oa":false,"landing_page_url":"https://jglobal.jst.go.jp/en/detail?JGLOBAL_ID=202002221338002053","pdf_url":null,"source":{"id":"https://openalex.org/S4306512817","display_name":"IEEE Conference Proceedings","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"IEEE Conference Proceedings","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.4300000071525574,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W410850256","https://openalex.org/W1615880688","https://openalex.org/W1859707960","https://openalex.org/W1903634653","https://openalex.org/W1945117268","https://openalex.org/W1967091776","https://openalex.org/W1969877208","https://openalex.org/W1984116073","https://openalex.org/W2003584736","https://openalex.org/W2011486541","https://openalex.org/W2034829411","https://openalex.org/W2048493531","https://openalex.org/W2049470338","https://openalex.org/W2092086632","https://openalex.org/W2097484305","https://openalex.org/W2109574129","https://openalex.org/W2129753516","https://openalex.org/W2133160781","https://openalex.org/W2141975087","https://openalex.org/W2149140091","https://openalex.org/W2157954477","https://openalex.org/W2159588611","https://openalex.org/W2163857507","https://openalex.org/W2184112231","https://openalex.org/W2213201020","https://openalex.org/W2304108215","https://openalex.org/W2708426512","https://openalex.org/W2734006851","https://openalex.org/W2735400742","https://openalex.org/W2782634983","https://openalex.org/W2796007311","https://openalex.org/W2963288913","https://openalex.org/W2998428063","https://openalex.org/W4231983080","https://openalex.org/W4388868528","https://openalex.org/W6614148910","https://openalex.org/W6639123046","https://openalex.org/W6740959804","https://openalex.org/W6772190375"],"related_works":["https://openalex.org/W4390608645","https://openalex.org/W4247566972","https://openalex.org/W2960264696","https://openalex.org/W3090563135","https://openalex.org/W2497432351","https://openalex.org/W4206777497","https://openalex.org/W2910064364","https://openalex.org/W4255224757","https://openalex.org/W2499527417","https://openalex.org/W2393741509"],"abstract_inverted_index":{"The":[0,92,177,211],"ability":[1,30],"to":[2,51,126,234],"analyze":[3],"the":[4,17,24,32,65,77,128,140,180,190,200],"available":[5,78],"data":[6,147,202],"is":[7,31,46,124],"a":[8,41,47,55,61,107,114,132,185,205],"valuable":[9],"asset":[10],"for":[11,27,71],"any":[12],"successful":[13],"business,":[14],"especially":[15],"when":[16],"analysis":[18],"yields":[19],"meaningful":[20],"knowledge.":[21],"One":[22],"of":[23,76,179],"key":[25],"processes":[26],"acquiring":[28],"such":[29],"Extract-Transform-Load":[33],"(ETL)":[34],"process.":[35],"For":[36],"Big":[37],"Data,":[38],"ETL":[39,73,109,152,182],"requires":[40],"significant":[42],"effort":[43],"and":[44,83,123,134,165,207],"it":[45,197],"very":[48],"challenging":[49],"task":[50],"be":[52,194],"performed":[53],"in":[54,64,88,184],"cost-effective":[56,72],"way.":[57],"There":[58],"are":[59,80,85,95],"quite":[60],"few":[62],"examples":[63,79],"literature":[66],"that":[67,94,189],"describe":[68],"an":[69],"architecture":[70],"but":[74],"none":[75],"complete":[81],"enough":[82],"they":[84],"usually":[86],"evaluated":[87,139,214],"narrow":[89],"problem":[90],"domains.":[91],"ones":[93],"more":[96],"general,":[97],"require":[98],"specific":[99],"implementation":[100,121],"details.":[101],"In":[102],"this":[103],"paper":[104],"we":[105,112],"propose":[106],"cloud-based":[108],"framework":[110],"where":[111],"use":[113],"general":[115],"cluster-size":[116],"optimization":[117],"algorithm,":[118],"while":[119],"providing":[120],"details,":[122],"able":[125],"perform":[127],"required":[129,201],"job":[130],"within":[131,168,204],"predefined,":[133],"thus":[135],"known,":[136],"time.":[137],"We":[138],"algorithm":[141],"by":[142,220],"executing":[143],"three":[144,181],"scenarios":[145,183],"regarding":[146],"aggregation":[148,157,167],"during":[149],"ETL:":[150],"(i)":[151],"with":[153,225,228],"no":[154],"aggregation;":[155],"(ii)":[156],"based":[158],"on":[159,215],"predefined":[160,206],"columns":[161],"or":[162],"time":[163,175],"intervals;":[164],"(iii)":[166],"single":[169],"user":[170,222],"sessions":[171],"spanning":[172],"over":[173],"arbitrary":[174],"intervals.":[176],"execution":[178],"production":[186],"setting":[187],"showed":[188],"cluster":[191],"size":[192],"could":[193],"optimized":[195],"so":[196],"can":[198],"process":[199],"volume":[203],"thus,":[208],"expected,":[209],"latency.":[210],"scalability":[212],"was":[213],"Amazon":[216],"AWS":[217],"Hadoop":[218],"clusters":[219],"processing":[221],"logs":[223],"collected":[224],"Kinesis":[226],"streams":[227],"datasets":[229],"ranging":[230],"from":[231],"30":[232],"GB":[233],"2.6":[235],"TB.":[236]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
