{"id":"https://openalex.org/W3008824548","doi":"https://doi.org/10.1109/bigdata47090.2019.9006065","title":"High Dimensional Data Clustering by means of Distributed Dirichlet Process Mixture Models","display_name":"High Dimensional Data Clustering by means of Distributed Dirichlet Process Mixture Models","publication_year":2019,"publication_date":"2019-12-01","ids":{"openalex":"https://openalex.org/W3008824548","doi":"https://doi.org/10.1109/bigdata47090.2019.9006065","mag":"3008824548"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata47090.2019.9006065","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata47090.2019.9006065","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hal-lirmm.ccsd.cnrs.fr/lirmm-02364411","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063835593","display_name":"Khadidja Meguelati","orcid":null},"institutions":[{"id":"https://openalex.org/I19894307","display_name":"Universit\u00e9 de Montpellier","ror":"https://ror.org/051escj72","country_code":"FR","type":"education","lineage":["https://openalex.org/I19894307"]},{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4210101743","display_name":"Laboratoire d'Informatique, de Robotique et de Micro\u00e9lectronique de Montpellier","ror":"https://ror.org/013yean28","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I1326498283","https://openalex.org/I151295451","https://openalex.org/I19894307","https://openalex.org/I4210101743","https://openalex.org/I4210159245","https://openalex.org/I4405261681"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Khadidja Meguelati","raw_affiliation_strings":["Inria, LIRMM, Univ Montpellier, CNRS. Montpellier, France"],"affiliations":[{"raw_affiliation_string":"Inria, LIRMM, Univ Montpellier, CNRS. Montpellier, France","institution_ids":["https://openalex.org/I4210101743","https://openalex.org/I19894307","https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043861020","display_name":"B\u00e9n\u00e9dicte Fontez","orcid":"https://orcid.org/0000-0002-2237-0489"},"institutions":[{"id":"https://openalex.org/I4210117045","display_name":"Math\u00e9matiques, Informatique et Statistique pour l'Environnement et l'Agronomie","ror":"https://ror.org/01pd2sz18","country_code":"FR","type":"facility","lineage":["https://openalex.org/I24906876","https://openalex.org/I4210088668","https://openalex.org/I4210088668","https://openalex.org/I4210102957","https://openalex.org/I4210117045","https://openalex.org/I4210117091","https://openalex.org/I4210147367","https://openalex.org/I4399657933"]},{"id":"https://openalex.org/I19894307","display_name":"Universit\u00e9 de Montpellier","ror":"https://ror.org/051escj72","country_code":"FR","type":"education","lineage":["https://openalex.org/I19894307"]},{"id":"https://openalex.org/I24906876","display_name":"Institut Agro Montpellier","ror":"https://ror.org/03rnk6m14","country_code":"FR","type":"education","lineage":["https://openalex.org/I24906876","https://openalex.org/I4210117091","https://openalex.org/I4399657933"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Benedicte Fontez","raw_affiliation_strings":["MISTEA, Montpellier SupAgro, INRA, Univ Montpellier, Montpellier, France"],"affiliations":[{"raw_affiliation_string":"MISTEA, Montpellier SupAgro, INRA, Univ Montpellier, Montpellier, France","institution_ids":["https://openalex.org/I24906876","https://openalex.org/I19894307","https://openalex.org/I4210117045"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037758902","display_name":"Nadine Hilgert","orcid":null},"institutions":[{"id":"https://openalex.org/I19894307","display_name":"Universit\u00e9 de Montpellier","ror":"https://ror.org/051escj72","country_code":"FR","type":"education","lineage":["https://openalex.org/I19894307"]},{"id":"https://openalex.org/I4210117045","display_name":"Math\u00e9matiques, Informatique et Statistique pour l'Environnement et l'Agronomie","ror":"https://ror.org/01pd2sz18","country_code":"FR","type":"facility","lineage":["https://openalex.org/I24906876","https://openalex.org/I4210088668","https://openalex.org/I4210088668","https://openalex.org/I4210102957","https://openalex.org/I4210117045","https://openalex.org/I4210117091","https://openalex.org/I4210147367","https://openalex.org/I4399657933"]},{"id":"https://openalex.org/I24906876","display_name":"Institut Agro Montpellier","ror":"https://ror.org/03rnk6m14","country_code":"FR","type":"education","lineage":["https://openalex.org/I24906876","https://openalex.org/I4210117091","https://openalex.org/I4399657933"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Nadine Hilgert","raw_affiliation_strings":["MISTEA, Montpellier SupAgro, INRA, Univ Montpellier, Montpellier, France"],"affiliations":[{"raw_affiliation_string":"MISTEA, Montpellier SupAgro, INRA, Univ Montpellier, Montpellier, France","institution_ids":["https://openalex.org/I24906876","https://openalex.org/I19894307","https://openalex.org/I4210117045"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045185842","display_name":"Florent Mass\u00e9glia","orcid":"https://orcid.org/0000-0002-1149-585X"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I19894307","display_name":"Universit\u00e9 de Montpellier","ror":"https://ror.org/051escj72","country_code":"FR","type":"education","lineage":["https://openalex.org/I19894307"]},{"id":"https://openalex.org/I4210101743","display_name":"Laboratoire d'Informatique, de Robotique et de Micro\u00e9lectronique de Montpellier","ror":"https://ror.org/013yean28","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I1326498283","https://openalex.org/I151295451","https://openalex.org/I19894307","https://openalex.org/I4210101743","https://openalex.org/I4210159245","https://openalex.org/I4405261681"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Florent Masseglia","raw_affiliation_strings":["Inria, LIRMM, Univ Montpellier, CNRS. Montpellier, France"],"affiliations":[{"raw_affiliation_string":"Inria, LIRMM, Univ Montpellier, CNRS. Montpellier, France","institution_ids":["https://openalex.org/I4210101743","https://openalex.org/I19894307","https://openalex.org/I1294671590"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5063835593"],"corresponding_institution_ids":["https://openalex.org/I1294671590","https://openalex.org/I19894307","https://openalex.org/I4210101743"],"apc_list":null,"apc_paid":null,"fwci":0.289,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.69056725,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"890","last_page":"899"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11901","display_name":"Bayesian Methods and Mixture Models","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11901","display_name":"Bayesian Methods and Mixture Models","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.8230193853378296},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7233266234397888},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.6454765796661377},{"id":"https://openalex.org/keywords/clustering-high-dimensional-data","display_name":"Clustering high-dimensional data","score":0.6205176115036011},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.5626665353775024},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.44611480832099915},{"id":"https://openalex.org/keywords/canopy-clustering-algorithm","display_name":"Canopy clustering algorithm","score":0.41131430864334106},{"id":"https://openalex.org/keywords/correlation-clustering","display_name":"Correlation clustering","score":0.378561407327652},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.22368231415748596}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.8230193853378296},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7233266234397888},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6454765796661377},{"id":"https://openalex.org/C184509293","wikidata":"https://www.wikidata.org/wiki/Q5136711","display_name":"Clustering high-dimensional data","level":3,"score":0.6205176115036011},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.5626665353775024},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.44611480832099915},{"id":"https://openalex.org/C104047586","wikidata":"https://www.wikidata.org/wiki/Q5033439","display_name":"Canopy clustering algorithm","level":4,"score":0.41131430864334106},{"id":"https://openalex.org/C94641424","wikidata":"https://www.wikidata.org/wiki/Q5172845","display_name":"Correlation clustering","level":3,"score":0.378561407327652},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.22368231415748596},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/bigdata47090.2019.9006065","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata47090.2019.9006065","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:lirmm-02364411v1","is_oa":true,"landing_page_url":"https://hal-lirmm.ccsd.cnrs.fr/lirmm-02364411","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://bigdataieee.org/BigData2019/","raw_type":"Conference papers"}],"best_oa_location":{"id":"pmh:oai:HAL:lirmm-02364411v1","is_oa":true,"landing_page_url":"https://hal-lirmm.ccsd.cnrs.fr/lirmm-02364411","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://bigdataieee.org/BigData2019/","raw_type":"Conference papers"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G174206280","display_name":null,"funder_award_id":"732051","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"},{"id":"https://openalex.org/G3335229504","display_name":null,"funder_award_id":"Avenir","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G3514550006","display_name":null,"funder_award_id":"Centre","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G3677188838","display_name":null,"funder_award_id":"Horizon 2020 Framework Programme for Research","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G4110391279","display_name":null,"funder_award_id":"project,","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G5468070371","display_name":null,"funder_award_id":"ANR-11-INBS-0012","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G5802621812","display_name":null,"funder_award_id":"ANR-11-INBS","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G7121953570","display_name":"European Cloud In-Memory Database Appliance with Predictable Performance for Critical Applications","funder_award_id":"732051","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8318064016","display_name":null,"funder_award_id":"Horizon","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G836972847","display_name":null,"funder_award_id":"11-INBS-0012","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320320883","display_name":"Agence Nationale de la Recherche","ror":"https://ror.org/00rbzpz17"},{"id":"https://openalex.org/F4320332999","display_name":"Horizon 2020 Framework Programme","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W197646968","https://openalex.org/W410850256","https://openalex.org/W1528907134","https://openalex.org/W1565176583","https://openalex.org/W1578929855","https://openalex.org/W1746819321","https://openalex.org/W1894414046","https://openalex.org/W1925821068","https://openalex.org/W1990881776","https://openalex.org/W1992961908","https://openalex.org/W1994005439","https://openalex.org/W2014831690","https://openalex.org/W2049633694","https://openalex.org/W2063202299","https://openalex.org/W2079501320","https://openalex.org/W2080972498","https://openalex.org/W2081935272","https://openalex.org/W2091797506","https://openalex.org/W2097490293","https://openalex.org/W2100231460","https://openalex.org/W2125858773","https://openalex.org/W2127218421","https://openalex.org/W2137130182","https://openalex.org/W2144151128","https://openalex.org/W2150097763","https://openalex.org/W2151681302","https://openalex.org/W2158865579","https://openalex.org/W2162833336","https://openalex.org/W2164274563","https://openalex.org/W2173213060","https://openalex.org/W2189465200","https://openalex.org/W2298278275","https://openalex.org/W2323688730","https://openalex.org/W2403744328","https://openalex.org/W2487770199","https://openalex.org/W2604269166","https://openalex.org/W2741076822","https://openalex.org/W2747313270","https://openalex.org/W2781626303","https://openalex.org/W2935808950","https://openalex.org/W2963288913","https://openalex.org/W3099427829","https://openalex.org/W3102882558","https://openalex.org/W3106407256","https://openalex.org/W3169372665","https://openalex.org/W4211049957","https://openalex.org/W4248679889","https://openalex.org/W6607985459","https://openalex.org/W6614148910","https://openalex.org/W6629804754","https://openalex.org/W6633943007","https://openalex.org/W6640062739","https://openalex.org/W6678914141","https://openalex.org/W6684050148","https://openalex.org/W6687322159","https://openalex.org/W6723168715","https://openalex.org/W6747400276"],"related_works":["https://openalex.org/W3144143113","https://openalex.org/W3022637481","https://openalex.org/W3120229345","https://openalex.org/W3039964395","https://openalex.org/W3088133960","https://openalex.org/W2371010743","https://openalex.org/W2607902515","https://openalex.org/W2393707058","https://openalex.org/W2170450904","https://openalex.org/W2065539689"],"abstract_inverted_index":{"Clustering":[0],"is":[1,30],"a":[2,31,105,144,151],"data":[3,9,138,149],"mining":[4],"technique":[5],"intensively":[6],"used":[7,33],"for":[8,34],"analytics,":[10],"with":[11,37,64,92],"applications":[12],"to":[13,122],"marketing,":[14],"security,":[15],"text/document":[16],"analysis,":[17],"or":[18],"sciences":[19],"like":[20],"biology,":[21],"astronomy,":[22],"and":[23,47,66,85,161],"many":[24],"more.":[25],"Dirichlet":[26,103],"Process":[27],"Mixture":[28],"(DPM)":[29],"model":[32],"multivariate":[35],"clustering":[36,107,134],"the":[38,42,53,75,111,166],"advantage":[39],"of":[40,44,55,71,77,113,135,146,153,169],"discovering":[41],"number":[43],"clusters":[45],"automatically":[46],"offering":[48],"favorable":[49],"characteristics.":[50],"However,":[51],"in":[52],"case":[54],"high":[56,93,136,167],"dimensional":[57,94,137],"data,":[58,164],"it":[59,119,132],"becomes":[60],"an":[61],"important":[62],"challenge":[63],"numerical":[65],"theoretical":[67],"pitfalls.":[68],"The":[69],"advantages":[70],"DPM":[72,88],"come":[73],"at":[74],"price":[76],"prohibitive":[78],"running":[79],"times,":[80],"which":[81],"impair":[82],"its":[83],"adoption":[84],"makes":[86],"centralized":[87],"approaches":[89],"inefficient,":[90],"especially":[91],"data.":[95],"We":[96],"propose":[97],"HD4C":[98],"(High":[99],"Dimensional":[100],"Data":[101],"Distributed":[102],"Clustering),":[104],"parallel":[106],"solution":[108],"that":[109],"addresses":[110],"curse":[112],"dimensionality":[114],"by":[115,125],"two":[116],"means.":[117],"First":[118],"gracefully":[120],"scales":[121],"massive":[123],"datasets":[124],"distributed":[126],"computing,":[127],"while":[128],"remaining":[129],"DPM-compliant.":[130],"Second,":[131],"performs":[133],"such":[139],"as":[140],"time":[141],"series":[142],"(as":[143,150],"function":[145,152],"time),":[147],"hyperspectral":[148],"wavelength)":[154],"etc.":[155],"Our":[156],"experiments,":[157],"on":[158],"both":[159],"synthetic":[160],"real":[162],"world":[163],"illustrate":[165],"performance":[168],"our":[170],"approach.":[171]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
