{"id":"https://openalex.org/W2132360421","doi":"https://doi.org/10.1109/icdm.2003.1250907","title":"Scalable model-based clustering by working on data summaries","display_name":"Scalable model-based clustering by working on data summaries","publication_year":2003,"publication_date":"2003-01-01","ids":{"openalex":"https://openalex.org/W2132360421","doi":"https://doi.org/10.1109/icdm.2003.1250907","mag":"2132360421"},"language":"en","primary_location":{"id":"doi:10.1109/icdm.2003.1250907","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdm.2003.1250907","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Third IEEE International Conference on Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007462806","display_name":"Huidong Jin","orcid":"https://orcid.org/0000-0002-3925-0256"},"institutions":[{"id":"https://openalex.org/I165488957","display_name":"Lingnan University","ror":"https://ror.org/0563pg902","country_code":"HK","type":"education","lineage":["https://openalex.org/I165488957"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Huidong Jin","raw_affiliation_strings":["Dept. of Inf. Syst., Lingnan Univ., Tuen Mun, China","Dept. of Inf. Syst., Lingnan Univ., Tuen Mun, China#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dept. of Inf. Syst., Lingnan Univ., Tuen Mun, China","institution_ids":["https://openalex.org/I165488957"]},{"raw_affiliation_string":"Dept. of Inf. Syst., Lingnan Univ., Tuen Mun, China#TAB#","institution_ids":["https://openalex.org/I165488957"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002024488","display_name":"Man\u2013Leung Wong","orcid":"https://orcid.org/0000-0002-4364-6747"},"institutions":[{"id":"https://openalex.org/I165488957","display_name":"Lingnan University","ror":"https://ror.org/0563pg902","country_code":"HK","type":"education","lineage":["https://openalex.org/I165488957"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Man-Leung Wong","raw_affiliation_strings":["Dept. of Inf. Syst., Lingnan Univ., Tuen Mun, China","Dept. of Inf. Syst., Lingnan Univ., Tuen Mun, China#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dept. of Inf. Syst., Lingnan Univ., Tuen Mun, China","institution_ids":["https://openalex.org/I165488957"]},{"raw_affiliation_string":"Dept. of Inf. Syst., Lingnan Univ., Tuen Mun, China#TAB#","institution_ids":["https://openalex.org/I165488957"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082233800","display_name":"Kwong\u2010Sak Leung","orcid":null},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Kwong-Sak Leung","raw_affiliation_strings":["Department of Computer Science & Engineering, Chinese University of Hong Kong, New Territories, Hong Kong, China","The Chinese University of Hong Kong"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science & Engineering, Chinese University of Hong Kong, New Territories, Hong Kong, China","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"The Chinese University of Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.4065,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.8566526,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"91","last_page":"98"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7794015407562256},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.777860164642334},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6569037437438965},{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.6064246892929077},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.597377598285675},{"id":"https://openalex.org/keywords/expectation\u2013maximization-algorithm","display_name":"Expectation\u2013maximization algorithm","score":0.5470713376998901},{"id":"https://openalex.org/keywords/cardinality","display_name":"Cardinality (data modeling)","score":0.4817473888397217},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.47821423411369324},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.29926857352256775},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1378888487815857},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.10849320888519287},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.10418775677680969}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7794015407562256},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.777860164642334},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6569037437438965},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.6064246892929077},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.597377598285675},{"id":"https://openalex.org/C182081679","wikidata":"https://www.wikidata.org/wiki/Q1275153","display_name":"Expectation\u2013maximization algorithm","level":3,"score":0.5470713376998901},{"id":"https://openalex.org/C87117476","wikidata":"https://www.wikidata.org/wiki/Q362383","display_name":"Cardinality (data modeling)","level":2,"score":0.4817473888397217},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.47821423411369324},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29926857352256775},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1378888487815857},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.10849320888519287},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.10418775677680969},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C49781872","wikidata":"https://www.wikidata.org/wiki/Q1045555","display_name":"Maximum likelihood","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/icdm.2003.1250907","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdm.2003.1250907","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Third IEEE International Conference on Data Mining","raw_type":"proceedings-article"},{"id":"pmh:oai:commons.ln.edu.hk:sw_master-7887","is_oa":false,"landing_page_url":"https://commons.ln.edu.hk/sw_master/6819","pdf_url":null,"source":{"id":"https://openalex.org/S4377196536","display_name":"Digital Commons - Lingnan (Lingnan University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I165488957","host_organization_name":"Lingnan University","host_organization_lineage":["https://openalex.org/I165488957"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Staff Publications","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.123.2079","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.123.2079","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://cptra.ln.edu.hk/~mlwong/conference/icdm2003.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W208128215","https://openalex.org/W1524704912","https://openalex.org/W1575476631","https://openalex.org/W1995639191","https://openalex.org/W1997200791","https://openalex.org/W1999349314","https://openalex.org/W2068289711","https://openalex.org/W2082583687","https://openalex.org/W2116929951","https://openalex.org/W2117853077","https://openalex.org/W2121328882","https://openalex.org/W2127042504","https://openalex.org/W2140190241","https://openalex.org/W2141245797","https://openalex.org/W2152255870","https://openalex.org/W2212819718","https://openalex.org/W2413021498","https://openalex.org/W4232023503","https://openalex.org/W4255446233","https://openalex.org/W6631241307","https://openalex.org/W6634394891","https://openalex.org/W6682557382"],"related_works":["https://openalex.org/W2366403280","https://openalex.org/W1495108544","https://openalex.org/W3148229873","https://openalex.org/W4242223894","https://openalex.org/W2091301346","https://openalex.org/W2150160875","https://openalex.org/W1517524280","https://openalex.org/W4306886878","https://openalex.org/W4323520239","https://openalex.org/W3111372071"],"abstract_inverted_index":{"The":[0,97],"scalability":[1],"problem":[2],"in":[3],"data":[4,30,83],"mining":[5],"involves":[6],"the":[7,43,110],"development":[8],"of":[9,46,76,106,117],"methods":[10],"for":[11,57],"handling":[12],"large":[13,29],"databases":[14],"with":[15,81,114],"limited":[16],"computational":[17,131],"resources.":[18,132],"We":[19],"present":[20],"a":[21,28,49,63],"two-phase":[22],"scalable":[23],"model-based":[24,126],"clustering":[25,92,127],"framework:":[26],"first,":[27],"set":[31],"is":[32,87],"summed":[33],"up":[34],"into":[35],"subclusters;":[36],"Then,":[37],"clusters":[38],"are":[39],"directly":[40],"generated":[41],"from":[42],"summary":[44],"statistics":[45],"subclusters":[47],"by":[48],"specifically":[50],"designed":[51],"expectation-maximization":[52],"(EM)":[53],"algorithm.":[54],"Taking":[55],"example":[56],"Gaussian":[58],"mixture":[59],"models,":[60],"we":[61],"establish":[62],"provably":[64],"convergent":[65],"EM":[66,112],"algorithm,":[67],"EMADS,":[68],"which":[69],"embodies":[70],"cardinality,":[71],"mean,":[72],"and":[73,95],"covariance":[74],"information":[75],"each":[77],"subcluster":[78],"explicitly.":[79],"Combining":[80],"different":[82],"summarization":[84],"procedures,":[85],"EMADS":[86],"used":[88],"to":[89],"construct":[90],"two":[91],"systems:":[93],"gEMADS":[94],"bEMADS.":[96],"experimental":[98],"results":[99,123],"demonstrate":[100],"that":[101],"they":[102],"run":[103],"several":[104],"orders":[105],"magnitude":[107],"faster":[108],"than":[109,124],"classic":[111],"algorithm":[113],"little":[115],"loss":[116],"accuracy.":[118],"They":[119],"generate":[120],"significantly":[121],"better":[122],"other":[125],"systems":[128],"using":[129],"similar":[130]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
