{"id":"https://openalex.org/W2070670538","doi":"https://doi.org/10.1145/1008992.1009029","title":"Document clustering by concept factorization","display_name":"Document clustering by concept factorization","publication_year":2004,"publication_date":"2004-07-25","ids":{"openalex":"https://openalex.org/W2070670538","doi":"https://doi.org/10.1145/1008992.1009029","mag":"2070670538"},"language":"en","primary_location":{"id":"doi:10.1145/1008992.1009029","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1008992.1009029","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th annual international ACM SIGIR conference on Research and development in information retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083556808","display_name":"Wei Xu","orcid":"https://orcid.org/0000-0002-1670-5174"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wei Xu","raw_affiliation_strings":["NEC Laboratories America, Inc., Cupertino, CA"],"affiliations":[{"raw_affiliation_string":"NEC Laboratories America, Inc., Cupertino, CA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100687952","display_name":"Yihong Gong","orcid":"https://orcid.org/0000-0002-1793-5836"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yihong Gong","raw_affiliation_strings":["NEC Laboratories America, Inc., Cupertino, CA"],"affiliations":[{"raw_affiliation_string":"NEC Laboratories America, Inc., Cupertino, CA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5083556808"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":5.2723,"has_fulltext":false,"cited_by_count":309,"citation_normalized_percentile":{"value":0.96045566,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"202","last_page":"209"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7987266778945923},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.653723418712616},{"id":"https://openalex.org/keywords/data-point","display_name":"Data point","score":0.5676019191741943},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5523661375045776},{"id":"https://openalex.org/keywords/non-negative-matrix-factorization","display_name":"Non-negative matrix factorization","score":0.5154953598976135},{"id":"https://openalex.org/keywords/cure-data-clustering-algorithm","display_name":"CURE data clustering algorithm","score":0.5044625997543335},{"id":"https://openalex.org/keywords/correlation-clustering","display_name":"Correlation clustering","score":0.4802694320678711},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.446372389793396},{"id":"https://openalex.org/keywords/data-stream-clustering","display_name":"Data stream clustering","score":0.4181441366672516},{"id":"https://openalex.org/keywords/factorization","display_name":"Factorization","score":0.4148101806640625},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.41338834166526794},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4045770764350891},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.30588823556900024},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.28788435459136963},{"id":"https://openalex.org/keywords/eigenvalues-and-eigenvectors","display_name":"Eigenvalues and eigenvectors","score":0.0823611319065094},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.06806075572967529}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7987266778945923},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.653723418712616},{"id":"https://openalex.org/C21080849","wikidata":"https://www.wikidata.org/wiki/Q13611879","display_name":"Data point","level":2,"score":0.5676019191741943},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5523661375045776},{"id":"https://openalex.org/C152671427","wikidata":"https://www.wikidata.org/wiki/Q10843505","display_name":"Non-negative matrix factorization","level":4,"score":0.5154953598976135},{"id":"https://openalex.org/C33704608","wikidata":"https://www.wikidata.org/wiki/Q5014717","display_name":"CURE data clustering algorithm","level":4,"score":0.5044625997543335},{"id":"https://openalex.org/C94641424","wikidata":"https://www.wikidata.org/wiki/Q5172845","display_name":"Correlation clustering","level":3,"score":0.4802694320678711},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.446372389793396},{"id":"https://openalex.org/C193143536","wikidata":"https://www.wikidata.org/wiki/Q5227360","display_name":"Data stream clustering","level":5,"score":0.4181441366672516},{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.4148101806640625},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.41338834166526794},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4045770764350891},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30588823556900024},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.28788435459136963},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0823611319065094},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.06806075572967529},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1008992.1009029","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1008992.1009029","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th annual international ACM SIGIR conference on Research and development in information retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1790954942","https://openalex.org/W1902027874","https://openalex.org/W2013029404","https://openalex.org/W2030951871","https://openalex.org/W2052819443","https://openalex.org/W2064580901","https://openalex.org/W2064742743","https://openalex.org/W2108995755","https://openalex.org/W2118388177","https://openalex.org/W2121947440","https://openalex.org/W2133576408","https://openalex.org/W2134737843","https://openalex.org/W2138103367","https://openalex.org/W2139850885","https://openalex.org/W2148070710","https://openalex.org/W2155754954","https://openalex.org/W2165874743","https://openalex.org/W2999905431","https://openalex.org/W4234920499","https://openalex.org/W6680905637","https://openalex.org/W6684578312"],"related_works":["https://openalex.org/W2491448268","https://openalex.org/W2559422900","https://openalex.org/W2892323093","https://openalex.org/W3144143113","https://openalex.org/W2394193399","https://openalex.org/W2181939267","https://openalex.org/W2390610678","https://openalex.org/W3071522575","https://openalex.org/W2363054820","https://openalex.org/W2160785859"],"abstract_inverted_index":{"In":[0,158],"this":[1,37,54],"paper,":[2],"we":[3,149],"propose":[4],"a":[5,18,30],"new":[6,165],"data":[7,23,27,40,73,80,113,134],"clustering":[8,41,98,135,179],"method":[9,92,96,119,136,166],"called":[10],"concept":[11,16],"factorization":[12,103],"that":[13,66,107,131,148],"models":[14],"each":[15,26,79],"as":[17,29],"linear":[19,31,51,55,89],"combination":[20,32],"of":[21,33,50,71,78,97,177],"the":[22,34,39,47,63,68,72,87,95,118,124,132,164,169,178],"points,":[24],"and":[25,53,117,137,145,155,174],"point":[28,81],"concepts.":[35],"With":[36],"model,":[38],"task":[42],"is":[43,58],"accomplished":[44],"by":[45,61],"computing":[46],"two":[48],"sets":[49],"coefficients,":[52],"coefficients":[56],"computation":[57],"carried":[59],"out":[60],"finding":[62],"non-negative":[64,101],"solution":[65],"minimizes":[67],"reconstruction":[69],"error":[70],"points.":[74],"The":[75],"cluster":[76],"label":[77],"can":[82,109,120],"be":[83,110,121],"easily":[84],"derived":[85],"from":[86,94],"obtained":[88],"coefficients.":[90],"This":[91],"differs":[93],"based":[99],"on":[100,152],"matrix":[102],"(NMF)":[104],"\\citeXu03":[105],"in":[106,123,171],"it":[108],"applied":[111],"to":[112,160],"containing":[114],"negative":[115],"values":[116],"implemented":[122],"kernel":[125],"space.":[126],"Our":[127],"experimental":[128],"results":[129],"show":[130],"proposed":[133],"its":[138,161,172],"variations":[139,147],"performs":[140],"best":[141],"among":[142],"11":[143],"algorithms":[144],"their":[146],"have":[150],"evaluated":[151],"both":[153],"TDT2":[154],"Reuters-21578":[156],"corpus.":[157],"addition":[159],"good":[162],"performance,":[163],"also":[167],"has":[168],"merit":[170],"easy":[173],"reliable":[175],"derivation":[176],"results.":[180]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":22},{"year":2024,"cited_by_count":19},{"year":2023,"cited_by_count":16},{"year":2022,"cited_by_count":20},{"year":2021,"cited_by_count":20},{"year":2020,"cited_by_count":18},{"year":2019,"cited_by_count":19},{"year":2018,"cited_by_count":22},{"year":2017,"cited_by_count":19},{"year":2016,"cited_by_count":19},{"year":2015,"cited_by_count":19},{"year":2014,"cited_by_count":14},{"year":2013,"cited_by_count":17},{"year":2012,"cited_by_count":12}],"updated_date":"2026-03-27T14:29:43.386196","created_date":"2025-10-10T00:00:00"}
