{"id":"https://openalex.org/W2000982728","doi":"https://doi.org/10.1145/1242572.1242590","title":"A new suffix tree similarity measure for document clustering","display_name":"A new suffix tree similarity measure for document clustering","publication_year":2007,"publication_date":"2007-05-08","ids":{"openalex":"https://openalex.org/W2000982728","doi":"https://doi.org/10.1145/1242572.1242590","mag":"2000982728"},"language":"en","primary_location":{"id":"doi:10.1145/1242572.1242590","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1242572.1242590","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th international conference on World Wide Web","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054302270","display_name":"Hung Chim","orcid":null},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Hung Chim","raw_affiliation_strings":["City University of Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"City University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100638710","display_name":"Xiaotie Deng","orcid":"https://orcid.org/0000-0002-5282-6467"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Xiaotie Deng","raw_affiliation_strings":["City University of Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"City University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5054302270"],"corresponding_institution_ids":["https://openalex.org/I168719708"],"apc_list":null,"apc_paid":null,"fwci":14.0804,"has_fulltext":false,"cited_by_count":108,"citation_normalized_percentile":{"value":0.98813984,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"121","last_page":"130"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7637925744056702},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6998367309570312},{"id":"https://openalex.org/keywords/document-clustering","display_name":"Document clustering","score":0.6708047986030579},{"id":"https://openalex.org/keywords/hierarchical-clustering","display_name":"Hierarchical clustering","score":0.5934500694274902},{"id":"https://openalex.org/keywords/similarity-measure","display_name":"Similarity measure","score":0.5688750147819519},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5240455269813538},{"id":"https://openalex.org/keywords/suffix-tree","display_name":"Suffix tree","score":0.5055279731750488},{"id":"https://openalex.org/keywords/suffix","display_name":"Suffix","score":0.49139657616615295},{"id":"https://openalex.org/keywords/fuzzy-clustering","display_name":"Fuzzy clustering","score":0.48781564831733704},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4563825726509094},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.4478133022785187},{"id":"https://openalex.org/keywords/single-linkage-clustering","display_name":"Single-linkage clustering","score":0.4475720524787903},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.44587212800979614},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4431379437446594},{"id":"https://openalex.org/keywords/correlation-clustering","display_name":"Correlation clustering","score":0.43261200189590454},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.41623297333717346},{"id":"https://openalex.org/keywords/canopy-clustering-algorithm","display_name":"Canopy clustering algorithm","score":0.3904992341995239},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21444886922836304},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.14098510146141052},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.058075517416000366}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7637925744056702},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6998367309570312},{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.6708047986030579},{"id":"https://openalex.org/C92835128","wikidata":"https://www.wikidata.org/wiki/Q1277447","display_name":"Hierarchical clustering","level":3,"score":0.5934500694274902},{"id":"https://openalex.org/C2776517306","wikidata":"https://www.wikidata.org/wiki/Q29017317","display_name":"Similarity measure","level":2,"score":0.5688750147819519},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5240455269813538},{"id":"https://openalex.org/C2781166958","wikidata":"https://www.wikidata.org/wiki/Q1426863","display_name":"Suffix tree","level":3,"score":0.5055279731750488},{"id":"https://openalex.org/C2779804580","wikidata":"https://www.wikidata.org/wiki/Q102047","display_name":"Suffix","level":2,"score":0.49139657616615295},{"id":"https://openalex.org/C17212007","wikidata":"https://www.wikidata.org/wiki/Q5511111","display_name":"Fuzzy clustering","level":3,"score":0.48781564831733704},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4563825726509094},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.4478133022785187},{"id":"https://openalex.org/C22648726","wikidata":"https://www.wikidata.org/wiki/Q7523744","display_name":"Single-linkage clustering","level":5,"score":0.4475720524787903},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.44587212800979614},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4431379437446594},{"id":"https://openalex.org/C94641424","wikidata":"https://www.wikidata.org/wiki/Q5172845","display_name":"Correlation clustering","level":3,"score":0.43261200189590454},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.41623297333717346},{"id":"https://openalex.org/C104047586","wikidata":"https://www.wikidata.org/wiki/Q5033439","display_name":"Canopy clustering algorithm","level":4,"score":0.3904992341995239},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21444886922836304},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.14098510146141052},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.058075517416000366},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1242572.1242590","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1242572.1242590","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th international conference on World Wide Web","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W89608058","https://openalex.org/W1023337223","https://openalex.org/W1483301080","https://openalex.org/W1488723011","https://openalex.org/W1508165687","https://openalex.org/W1521439890","https://openalex.org/W1550450643","https://openalex.org/W1591171773","https://openalex.org/W1615772642","https://openalex.org/W1660390307","https://openalex.org/W1680839992","https://openalex.org/W1750362510","https://openalex.org/W1755881643","https://openalex.org/W1873415621","https://openalex.org/W1920790656","https://openalex.org/W1978394996","https://openalex.org/W1982026570","https://openalex.org/W1994851566","https://openalex.org/W1995552834","https://openalex.org/W2008434289","https://openalex.org/W2021809202","https://openalex.org/W2022828110","https://openalex.org/W2035695027","https://openalex.org/W2059513841","https://openalex.org/W2070412788","https://openalex.org/W2071664212","https://openalex.org/W2087710618","https://openalex.org/W2096152098","https://openalex.org/W2100958137","https://openalex.org/W2116091861","https://openalex.org/W2121996546","https://openalex.org/W2137763598","https://openalex.org/W2145036943","https://openalex.org/W2150102617","https://openalex.org/W2158139273","https://openalex.org/W2158874082","https://openalex.org/W2160011913","https://openalex.org/W2165612380","https://openalex.org/W2226484784","https://openalex.org/W2235578426","https://openalex.org/W2482796620","https://openalex.org/W4248923238"],"related_works":["https://openalex.org/W96331545","https://openalex.org/W1548907175","https://openalex.org/W2583658747","https://openalex.org/W1882920571","https://openalex.org/W1517600056","https://openalex.org/W2159942118","https://openalex.org/W193772702","https://openalex.org/W2003608043","https://openalex.org/W2036633468","https://openalex.org/W2160738675"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,38,103],"propose":[4],"a":[5,40,67,138],"new":[6,26,41,63,106],"similarity":[7,13,29,83],"measure":[8,30,84],"to":[9,125],"compute":[10],"the":[11,25,62,75,86,97,105,111,133,134],"pairwise":[12],"of":[14,77,94,99],"text-based":[15],"documents":[16,113,136],"based":[17],"on":[18,50,96],"suffix":[19,27,42],"tree":[20,28,43],"document":[21,44,53,70],"model.":[22],"By":[23],"applying":[24],"in":[31,85,109,114,128,137],"Group-average":[32],"Agglomerative":[33],"Hierarchical":[34],"Clustering":[35],"(GAHC)":[36],"algorithm,":[37,89],"developed":[39,124],"clustering":[45,54,64,71,107,121],"algorithm":[46,65,108,122],"(NSTC).":[47],"Experimental":[48],"results":[49,76],"two":[51],"standard":[52],"benchmark":[55],"corpus":[56],"OHSUMED":[57],"and":[58,131],"RCV1":[59],"indicate":[60],"that":[61],"is":[66,123],"very":[68],"effective":[69],"algorithm.":[72],"Comparing":[73],"with":[74],"traditional":[78],"word":[79],"term":[80],"weight":[81],"tf-idf":[82],"same":[87],"GAHC":[88],"NSTC":[90],"achieved":[91],"an":[92],"improvement":[93],"51%":[95],"average":[98],"F-measure":[100],"score.":[101],"Furthermore,":[102],"apply":[104],"analyzing":[110],"Web":[112,135],"online":[115],"forum":[116,140],"communities.":[117],"A":[118],"topic":[119],"oriented":[120],"help":[126],"people":[127],"assessing,":[129],"classifying":[130],"searching":[132],"large":[139],"community.":[141]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":8},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":10},{"year":2014,"cited_by_count":7},{"year":2013,"cited_by_count":14},{"year":2012,"cited_by_count":10}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
