{"id":"https://openalex.org/W2935808950","doi":"https://doi.org/10.1145/3297280.3297327","title":"Dirichlet process mixture models made scalable and effective by means of massive distribution","display_name":"Dirichlet process mixture models made scalable and effective by means of massive distribution","publication_year":2019,"publication_date":"2019-04-08","ids":{"openalex":"https://openalex.org/W2935808950","doi":"https://doi.org/10.1145/3297280.3297327","mag":"2935808950"},"language":"en","primary_location":{"id":"doi:10.1145/3297280.3297327","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3297280.3297327","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 34th ACM/SIGAPP Symposium on Applied Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hal.science/hal-01999453","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063835593","display_name":"Khadidja Meguelati","orcid":null},"institutions":[{"id":"https://openalex.org/I4210163794","display_name":"Data Management (Italy)","ror":"https://ror.org/017j5za44","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210163794"]},{"id":"https://openalex.org/I205747304","display_name":"Institut National Polytechnique de Toulouse","ror":"https://ror.org/033p9g875","country_code":"FR","type":"education","lineage":["https://openalex.org/I205747304","https://openalex.org/I4405258862"]},{"id":"https://openalex.org/I4210101743","display_name":"Laboratoire d'Informatique, de Robotique et de Micro\u00e9lectronique de Montpellier","ror":"https://ror.org/013yean28","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I1326498283","https://openalex.org/I151295451","https://openalex.org/I19894307","https://openalex.org/I4210101743","https://openalex.org/I4210159245","https://openalex.org/I4405261681"]}],"countries":["FR","IT"],"is_corresponding":true,"raw_author_name":"Khadidja Meguelati","raw_affiliation_strings":["Inria and LIRMM, Montpellier, France","ZENITH - Scientific Data Management (LIRMM, 161 rue Ada, 34000 Montpellier - France)"],"affiliations":[{"raw_affiliation_string":"Inria and LIRMM, Montpellier, France","institution_ids":["https://openalex.org/I4210101743"]},{"raw_affiliation_string":"ZENITH - Scientific Data Management (LIRMM, 161 rue Ada, 34000 Montpellier - France)","institution_ids":["https://openalex.org/I205747304","https://openalex.org/I4210163794","https://openalex.org/I4210101743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043861020","display_name":"B\u00e9n\u00e9dicte Fontez","orcid":"https://orcid.org/0000-0002-2237-0489"},"institutions":[{"id":"https://openalex.org/I19894307","display_name":"Universit\u00e9 de Montpellier","ror":"https://ror.org/051escj72","country_code":"FR","type":"education","lineage":["https://openalex.org/I19894307"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Benedicte Fontez","raw_affiliation_strings":["Univ.Montpellier, Montpellier, France"],"affiliations":[{"raw_affiliation_string":"Univ.Montpellier, Montpellier, France","institution_ids":["https://openalex.org/I19894307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037758902","display_name":"Nadine Hilgert","orcid":null},"institutions":[{"id":"https://openalex.org/I19894307","display_name":"Universit\u00e9 de Montpellier","ror":"https://ror.org/051escj72","country_code":"FR","type":"education","lineage":["https://openalex.org/I19894307"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Nadine Hilgert","raw_affiliation_strings":["Univ.Montpellier, Montpellier, France"],"affiliations":[{"raw_affiliation_string":"Univ.Montpellier, Montpellier, France","institution_ids":["https://openalex.org/I19894307"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045185842","display_name":"Florent Mass\u00e9glia","orcid":"https://orcid.org/0000-0002-1149-585X"},"institutions":[{"id":"https://openalex.org/I205747304","display_name":"Institut National Polytechnique de Toulouse","ror":"https://ror.org/033p9g875","country_code":"FR","type":"education","lineage":["https://openalex.org/I205747304","https://openalex.org/I4405258862"]},{"id":"https://openalex.org/I4210101743","display_name":"Laboratoire d'Informatique, de Robotique et de Micro\u00e9lectronique de Montpellier","ror":"https://ror.org/013yean28","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I1326498283","https://openalex.org/I151295451","https://openalex.org/I19894307","https://openalex.org/I4210101743","https://openalex.org/I4210159245","https://openalex.org/I4405261681"]},{"id":"https://openalex.org/I4210163794","display_name":"Data Management (Italy)","ror":"https://ror.org/017j5za44","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210163794"]}],"countries":["FR","IT"],"is_corresponding":false,"raw_author_name":"Florent Masseglia","raw_affiliation_strings":["Inria and LIRMM, Montpellier, France","ZENITH - Scientific Data Management (LIRMM, 161 rue Ada, 34000 Montpellier - France)"],"affiliations":[{"raw_affiliation_string":"Inria and LIRMM, Montpellier, France","institution_ids":["https://openalex.org/I4210101743"]},{"raw_affiliation_string":"ZENITH - Scientific Data Management (LIRMM, 161 rue Ada, 34000 Montpellier - France)","institution_ids":["https://openalex.org/I205747304","https://openalex.org/I4210163794","https://openalex.org/I4210101743"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5063835593"],"corresponding_institution_ids":["https://openalex.org/I205747304","https://openalex.org/I4210101743","https://openalex.org/I4210163794"],"apc_list":null,"apc_paid":null,"fwci":0.5772,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.74902288,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"502","last_page":"509"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11901","display_name":"Bayesian Methods and Mixture Models","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11901","display_name":"Bayesian Methods and Mixture Models","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10136","display_name":"Statistical Methods and Inference","score":0.9592999815940857,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.8354865312576294},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7663508653640747},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7304421663284302},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6125163435935974},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.570245087146759},{"id":"https://openalex.org/keywords/dirichlet-process","display_name":"Dirichlet process","score":0.48316115140914917},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4799899756908417},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.4652702808380127},{"id":"https://openalex.org/keywords/limit","display_name":"Limit (mathematics)","score":0.4648042917251587},{"id":"https://openalex.org/keywords/latent-dirichlet-allocation","display_name":"Latent Dirichlet allocation","score":0.4626394808292389},{"id":"https://openalex.org/keywords/data-point","display_name":"Data point","score":0.44963476061820984},{"id":"https://openalex.org/keywords/dirichlet-distribution","display_name":"Dirichlet distribution","score":0.42496445775032043},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.2662205100059509},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.24572381377220154},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.18943190574645996},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.18522575497627258},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.12796801328659058},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12184295058250427}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.8354865312576294},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7663508653640747},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7304421663284302},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6125163435935974},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.570245087146759},{"id":"https://openalex.org/C2781280628","wikidata":"https://www.wikidata.org/wiki/Q5280766","display_name":"Dirichlet process","level":3,"score":0.48316115140914917},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4799899756908417},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.4652702808380127},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.4648042917251587},{"id":"https://openalex.org/C500882744","wikidata":"https://www.wikidata.org/wiki/Q269236","display_name":"Latent Dirichlet allocation","level":3,"score":0.4626394808292389},{"id":"https://openalex.org/C21080849","wikidata":"https://www.wikidata.org/wiki/Q13611879","display_name":"Data point","level":2,"score":0.44963476061820984},{"id":"https://openalex.org/C169214877","wikidata":"https://www.wikidata.org/wiki/Q981016","display_name":"Dirichlet distribution","level":3,"score":0.42496445775032043},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.2662205100059509},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.24572381377220154},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.18943190574645996},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.18522575497627258},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.12796801328659058},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12184295058250427},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C182310444","wikidata":"https://www.wikidata.org/wiki/Q1332643","display_name":"Boundary value problem","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3297280.3297327","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3297280.3297327","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 34th ACM/SIGAPP Symposium on Applied Computing","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-01999453v1","is_oa":true,"landing_page_url":"https://hal.science/hal-01999453","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://www.sigapp.org/sac/sac2019/","raw_type":"Conference papers"},{"id":"pmh:oai:HAL:lirmm-03036914v1","is_oa":false,"landing_page_url":"https://hal-lirmm.ccsd.cnrs.fr/lirmm-03036914","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"34th ACM/SIGAPP Symposium on Applied Computing (SAC)","raw_type":"info:eu-repo/semantics/conferenceObject"},{"id":"pmh:oai:prodinra.inra.fr:470730","is_oa":false,"landing_page_url":"http://prodinra.inra.fr/ft/B2391A42-A17B-4067-BA7B-C7F0E2F001F6","pdf_url":null,"source":{"id":"https://openalex.org/S4306400794","display_name":"Prodinra (INRA Bordeaux-Aquitaine)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210088668","host_organization_name":"Institut National de Recherche pour l'Agriculture, l'Alimentation et l'Environnement","host_organization_lineage":["https://openalex.org/I4210088668"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"SAC '19: Proceedings of the 34th ACM/SIGAPP symposium on applied computing. 2019; 34. ACM/SIGAPP Symposium On Applied Computing : SAC 2019, Limassol, CYP, 2019-04-08-2019-04-12, 502-509","raw_type":"PROCEEDING_PAPER"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-01999453v1","is_oa":true,"landing_page_url":"https://hal.science/hal-01999453","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://www.sigapp.org/sac/sac2019/","raw_type":"Conference papers"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/17","score":0.4000000059604645,"display_name":"Partnerships for the goals"}],"awards":[{"id":"https://openalex.org/G174206280","display_name":null,"funder_award_id":"732051","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"},{"id":"https://openalex.org/G3677188838","display_name":null,"funder_award_id":"Horizon 2020 Framework Programme for Research","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G7121953570","display_name":"European Cloud In-Memory Database Appliance with Predictable Performance for Critical Applications","funder_award_id":"732051","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8318064016","display_name":null,"funder_award_id":"Horizon","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320332999","display_name":"Horizon 2020 Framework Programme","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W57376053","https://openalex.org/W1503404899","https://openalex.org/W1864426711","https://openalex.org/W1967687583","https://openalex.org/W2032510388","https://openalex.org/W2034616054","https://openalex.org/W2035756456","https://openalex.org/W2049633694","https://openalex.org/W2079501320","https://openalex.org/W2080972498","https://openalex.org/W2081935272","https://openalex.org/W2091797506","https://openalex.org/W2100231460","https://openalex.org/W2116137244","https://openalex.org/W2125858773","https://openalex.org/W2137130182","https://openalex.org/W2151967501","https://openalex.org/W2158865579","https://openalex.org/W2162833336","https://openalex.org/W2173213060","https://openalex.org/W2189465200","https://openalex.org/W2395355800","https://openalex.org/W2747313270","https://openalex.org/W3003241580","https://openalex.org/W3102882558","https://openalex.org/W3106407256","https://openalex.org/W4241572446"],"related_works":["https://openalex.org/W2891616219","https://openalex.org/W2497860580","https://openalex.org/W153004028","https://openalex.org/W1902157726","https://openalex.org/W3204672119","https://openalex.org/W2393894849","https://openalex.org/W197558907","https://openalex.org/W2172796364","https://openalex.org/W3122449418","https://openalex.org/W3121467232"],"abstract_inverted_index":{"Clustering":[0],"with":[1,21],"accurate":[2],"results":[3],"have":[4],"become":[5],"a":[6,16,70],"topic":[7],"of":[8,24,28,53,79,90,107,112],"high":[9,105],"interest.":[10],"Dirichlet":[11],"Process":[12],"Mixture":[13],"(DPM)":[14],"is":[15,87],"model":[17],"used":[18],"for":[19],"clustering":[20,72],"the":[22,26,41,45,51,88,104],"advantage":[23],"discovering":[25],"number":[27],"clusters":[29,43],"automatically":[30],"and":[31,61,99,121],"offering":[32],"nice":[33],"properties":[34],"like,":[35],"e.g.,":[36],"its":[37,59,123],"potential":[38],"convergence":[39],"to":[40,77],"actual":[42],"in":[44],"data.":[46],"These":[47],"advantages":[48],"come":[49],"at":[50],"price":[52],"prohibitive":[54],"response":[55],"times,":[56],"which":[57,86],"impairs":[58],"adoption":[60],"makes":[62],"centralized":[63,116],"DPM":[64,84],"approaches":[65],"inefficient.":[66],"We":[67],"propose":[68],"DC-DPM,":[69],"parallel":[71],"solution":[73],"that":[74],"gracefully":[75],"scales":[76],"millions":[78,111],"data":[80,113,127],"points":[81],"while":[82],"remaining":[83],"compliant,":[85],"challenge":[89],"distributing":[91],"this":[92,137],"process.":[93],"Our":[94],"experiments,":[95],"on":[96,110,125],"both":[97],"synthetic":[98],"real":[100],"world":[101],"data,":[102],"illustrate":[103],"performance":[106],"our":[108,139],"approach":[109,140],"points.":[114],"The":[115],"algorithm":[117],"does":[118],"not":[119],"scale":[120],"has":[122],"limit":[124],"100K":[126],"points,":[128],"where":[129],"it":[130],"needs":[131,141],"more":[132],"than":[133,143],"7":[134],"hours.":[135],"In":[136],"case,":[138],"less":[142],"30":[144],"seconds.":[145]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2019,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
