{"id":"https://openalex.org/W2981171684","doi":"https://doi.org/10.1109/tcc.2019.2947678","title":"Cutting the Unnecessary Long Tail: Cost-Effective Big Data Clustering in the Cloud","display_name":"Cutting the Unnecessary Long Tail: Cost-Effective Big Data Clustering in the Cloud","publication_year":2019,"publication_date":"2019-10-16","ids":{"openalex":"https://openalex.org/W2981171684","doi":"https://doi.org/10.1109/tcc.2019.2947678","mag":"2981171684"},"language":"en","primary_location":{"id":"doi:10.1109/tcc.2019.2947678","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcc.2019.2947678","pdf_url":null,"source":{"id":"https://openalex.org/S2492498579","display_name":"IEEE Transactions on Cloud Computing","issn_l":"2168-7161","issn":["2168-7161","2372-0018"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cloud Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100384479","display_name":"Dongwei Li","orcid":"https://orcid.org/0000-0002-4912-8510"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]},{"id":"https://openalex.org/I57093077","display_name":"Swinburne University of Technology","ror":"https://ror.org/031rekg67","country_code":"AU","type":"education","lineage":["https://openalex.org/I57093077"]}],"countries":["AU","CN"],"is_corresponding":false,"raw_author_name":"Dongwei Li","raw_affiliation_strings":["School of Computer Science, Beijing Institute of Technology, Beijing, Haidian, China","School of Software and Electrical Engineering, Swinburne University of Technology, Hawthorn, VIC, Australia"],"raw_orcid":"https://orcid.org/0000-0002-4912-8510","affiliations":[{"raw_affiliation_string":"School of Computer Science, Beijing Institute of Technology, Beijing, Haidian, China","institution_ids":["https://openalex.org/I125839683"]},{"raw_affiliation_string":"School of Software and Electrical Engineering, Swinburne University of Technology, Hawthorn, VIC, Australia","institution_ids":["https://openalex.org/I57093077"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000423121","display_name":"Shuliang Wang","orcid":"https://orcid.org/0000-0001-5326-7209"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuliang Wang","raw_affiliation_strings":["School of Computer Science, Beijing Institute of Technology, Beijing, Haidian, China"],"raw_orcid":"https://orcid.org/0000-0001-5326-7209","affiliations":[{"raw_affiliation_string":"School of Computer Science, Beijing Institute of Technology, Beijing, Haidian, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100599860","display_name":"Nan Gao","orcid":"https://orcid.org/0000-0002-9694-2689"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nan Gao","raw_affiliation_strings":["School of Science, Royal Melbourne Institute of Technology, Melbourne, VIC, Australia"],"raw_orcid":"https://orcid.org/0000-0002-9694-2689","affiliations":[{"raw_affiliation_string":"School of Science, Royal Melbourne Institute of Technology, Melbourne, VIC, Australia","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023499987","display_name":"Qiang He","orcid":"https://orcid.org/0000-0002-2607-4556"},"institutions":[{"id":"https://openalex.org/I57093077","display_name":"Swinburne University of Technology","ror":"https://ror.org/031rekg67","country_code":"AU","type":"education","lineage":["https://openalex.org/I57093077"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Qiang He","raw_affiliation_strings":["School of Software and Electrical Engineering, Swinburne University of Technology, Hawthorn, VIC, Australia"],"raw_orcid":"https://orcid.org/0000-0002-2607-4556","affiliations":[{"raw_affiliation_string":"School of Software and Electrical Engineering, Swinburne University of Technology, Hawthorn, VIC, Australia","institution_ids":["https://openalex.org/I57093077"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035343733","display_name":"Yun Yang","orcid":"https://orcid.org/0000-0002-7868-5471"},"institutions":[{"id":"https://openalex.org/I57093077","display_name":"Swinburne University of Technology","ror":"https://ror.org/031rekg67","country_code":"AU","type":"education","lineage":["https://openalex.org/I57093077"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Yun Yang","raw_affiliation_strings":["School of Software and Electrical Engineering, Swinburne University of Technology, Hawthorn, VIC, Australia"],"raw_orcid":"https://orcid.org/0000-0002-7868-5471","affiliations":[{"raw_affiliation_string":"School of Software and Electrical Engineering, Swinburne University of Technology, Hawthorn, VIC, Australia","institution_ids":["https://openalex.org/I57093077"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2892,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.67526322,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"10","issue":"1","first_page":"292","last_page":"303"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.835617184638977},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.7672330141067505},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7650234699249268},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.633334755897522},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.6026458740234375},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5551947951316833},{"id":"https://openalex.org/keywords/cure-data-clustering-algorithm","display_name":"CURE data clustering algorithm","score":0.5045081377029419},{"id":"https://openalex.org/keywords/canopy-clustering-algorithm","display_name":"Canopy clustering algorithm","score":0.5},{"id":"https://openalex.org/keywords/fuzzy-clustering","display_name":"Fuzzy clustering","score":0.49619966745376587},{"id":"https://openalex.org/keywords/data-stream-clustering","display_name":"Data stream clustering","score":0.4645850956439972},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4632858335971832},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.45758217573165894},{"id":"https://openalex.org/keywords/correlation-clustering","display_name":"Correlation clustering","score":0.4213590621948242},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33735233545303345},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3007923364639282}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.835617184638977},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.7672330141067505},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7650234699249268},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.633334755897522},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6026458740234375},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5551947951316833},{"id":"https://openalex.org/C33704608","wikidata":"https://www.wikidata.org/wiki/Q5014717","display_name":"CURE data clustering algorithm","level":4,"score":0.5045081377029419},{"id":"https://openalex.org/C104047586","wikidata":"https://www.wikidata.org/wiki/Q5033439","display_name":"Canopy clustering algorithm","level":4,"score":0.5},{"id":"https://openalex.org/C17212007","wikidata":"https://www.wikidata.org/wiki/Q5511111","display_name":"Fuzzy clustering","level":3,"score":0.49619966745376587},{"id":"https://openalex.org/C193143536","wikidata":"https://www.wikidata.org/wiki/Q5227360","display_name":"Data stream clustering","level":5,"score":0.4645850956439972},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4632858335971832},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.45758217573165894},{"id":"https://openalex.org/C94641424","wikidata":"https://www.wikidata.org/wiki/Q5172845","display_name":"Correlation clustering","level":3,"score":0.4213590621948242},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33735233545303345},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3007923364639282},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tcc.2019.2947678","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcc.2019.2947678","pdf_url":null,"source":{"id":"https://openalex.org/S2492498579","display_name":"IEEE Transactions on Cloud Computing","issn_l":"2168-7161","issn":["2168-7161","2372-0018"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cloud Computing","raw_type":"journal-article"},{"id":"pmh:oai:researchbank.swinburne.edu.au:1fcc7a41-b37f-4b7f-b657-b2e2b49e0f08/1","is_oa":false,"landing_page_url":"http://hdl.handle.net/1959.3/452464","pdf_url":null,"source":{"id":"https://openalex.org/S4306401157","display_name":"Swinburne Research Bank (Swinburne University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I57093077","host_organization_name":"Swinburne University of Technology","host_organization_lineage":["https://openalex.org/I57093077"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Transactions on Cloud Computing (2019), pp. 1-1","raw_type":""}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/15","score":0.41999998688697815,"display_name":"Life in Land"}],"awards":[{"id":"https://openalex.org/G4324613428","display_name":null,"funder_award_id":"2016YFC0803000","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G6365085709","display_name":null,"funder_award_id":"2016YFB0502604","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320322725","display_name":"China Scholarship Council","ror":"https://ror.org/04atp4p48"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1536340909","https://openalex.org/W1536713914","https://openalex.org/W1599740911","https://openalex.org/W2012759654","https://openalex.org/W2016159616","https://openalex.org/W2023954349","https://openalex.org/W2038249652","https://openalex.org/W2049633694","https://openalex.org/W2053742104","https://openalex.org/W2057923756","https://openalex.org/W2073849744","https://openalex.org/W2091276705","https://openalex.org/W2095483845","https://openalex.org/W2098533231","https://openalex.org/W2114296561","https://openalex.org/W2120652260","https://openalex.org/W2128525985","https://openalex.org/W2140405352","https://openalex.org/W2142458924","https://openalex.org/W2142827986","https://openalex.org/W2211843587","https://openalex.org/W2260244612","https://openalex.org/W2410213821","https://openalex.org/W2413245730","https://openalex.org/W2502759836","https://openalex.org/W2515866431","https://openalex.org/W2517202832","https://openalex.org/W2526211369","https://openalex.org/W2572048922","https://openalex.org/W2592962403","https://openalex.org/W2596781872","https://openalex.org/W2737995575","https://openalex.org/W2762329652","https://openalex.org/W2769557912","https://openalex.org/W2772816029","https://openalex.org/W2889833684","https://openalex.org/W2900036527","https://openalex.org/W2955962558","https://openalex.org/W3105577662","https://openalex.org/W4214564766","https://openalex.org/W4239328607","https://openalex.org/W4244030505","https://openalex.org/W4299837303","https://openalex.org/W6636657728","https://openalex.org/W6678914141"],"related_works":["https://openalex.org/W2892323093","https://openalex.org/W2117838073","https://openalex.org/W2361242132","https://openalex.org/W3071522575","https://openalex.org/W3140018618","https://openalex.org/W2353443653","https://openalex.org/W2374506950","https://openalex.org/W2556490192","https://openalex.org/W2596632494","https://openalex.org/W4312412183"],"abstract_inverted_index":{"Clustering":[0],"big":[1,45,104],"data":[2,46,105,147],"often":[3,57],"requires":[4],"tremendous":[5],"computational":[6],"resources":[7],"where":[8],"cloud":[9,24,165],"computing":[10],"is":[11,56,99,139],"undoubtedly":[12],"one":[13],"of":[14,54,195,215],"the":[15,19,23,44,52,60,66,76,81,91,108,112,116,136,150,164,173,177,196,207,216,225,240],"promising":[16],"solutions.":[17],"However,":[18],"computation":[20,94,197,217],"cost":[21,198],"in":[22,43,59,65,80,107,163,172,224,242],"can":[25,120,159,234],"be":[26,32],"unexpectedly":[27],"high":[28,161],"if":[29],"it":[30],"cannot":[31],"managed":[33],"properly.":[34],"The":[35],"long":[36,78],"tail":[37,79],"phenomenon":[38],"has":[39],"been":[40],"observed":[41],"widely":[42,122],"clustering":[47,67,82,106],"area,":[48],"which":[49],"indicates":[50],"that":[51,153,185,221],"majority":[53],"time":[55],"consumed":[58],"middle":[61],"to":[62,74,84,101,237],"late":[63],"stages":[64],"process.":[68],"In":[69],"this":[70],"research,":[71],"we":[72,119,183],"try":[73],"cut":[75],"unnecessary":[77],"process":[83],"achieve":[85,102,160],"a":[86,187,202],"sufficiently":[87],"satisfactory":[88],"accuracy":[89,138,190,205],"at":[90,131],"lowest":[92],"possible":[93],"cost.":[95,218],"A":[96],"novel":[97],"approach":[98,233],"proposed":[100,168],"cost-effective":[103],"cloud.":[109],"By":[110],"training":[111],"regression":[113],"model":[114],"with":[115,166,176],"sampling":[117],"data,":[118],"make":[121],"used":[123],"k-means":[124,155,181],"and":[125,149,156],"EM":[126,157,210],"(Expectation-Maximization)":[127],"algorithms":[128,158],"stop":[129],"automatically":[130],"an":[132],"early":[133],"point":[134],"when":[135],"desired":[137],"obtained.":[140],"Experiments":[141],"are":[142],"conducted":[143],"on":[144],"four":[145],"popular":[146],"sets":[148],"results":[151],"demonstrate":[152],"both":[154],"cost-effectiveness":[162],"our":[167,232],"approach.":[169],"For":[170],"example,":[171,231],"case":[174],"studies":[175],"much":[178],"more":[179],"efficient":[180,209],"algorithm,":[182],"find":[184],"achieving":[186,201],"99":[188],"percent":[189,194,204,214],"needs":[191,212],"only":[192],"47.71-71.14":[193],"required":[199],"for":[200,239],"100":[203],"while":[206],"less":[208],"algorithm":[211],"16.69-32.04":[213],"To":[219],"put":[220],"into":[222],"perspective,":[223],"United":[226],"States":[227],"land":[228],"use":[229],"classification":[230],"save":[235],"up":[236],"$94,687.49":[238],"government":[241],"each":[243],"use.":[244]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
