{"id":"https://openalex.org/W3034101180","doi":"https://doi.org/10.1109/eisic49498.2019.9108898","title":"A comparative study of clustering methods using word embeddings","display_name":"A comparative study of clustering methods using word embeddings","publication_year":2019,"publication_date":"2019-11-01","ids":{"openalex":"https://openalex.org/W3034101180","doi":"https://doi.org/10.1109/eisic49498.2019.9108898","mag":"3034101180"},"language":"en","primary_location":{"id":"doi:10.1109/eisic49498.2019.9108898","is_oa":false,"landing_page_url":"https://doi.org/10.1109/eisic49498.2019.9108898","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 European Intelligence and Security Informatics Conference (EISIC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051296136","display_name":"Nikolaos Bastas","orcid":null},"institutions":[{"id":"https://openalex.org/I4210134249","display_name":"Centre for Research and Technology Hellas","ror":"https://ror.org/03bndpq63","country_code":"GR","type":"facility","lineage":["https://openalex.org/I4210134249"]},{"id":"https://openalex.org/I4210093649","display_name":"Information Technologies Institute","ror":"https://ror.org/0069akp70","country_code":"GR","type":"nonprofit","lineage":["https://openalex.org/I4210093649"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"Nikolaos Bastas","raw_affiliation_strings":["Information Technologies Institute, Centre for Research and Technology Hellas,Thessaloniki,Greece","Information Technologies Institute, Centre for Research and Technology Hellas, Thessaloniki, Greece"],"affiliations":[{"raw_affiliation_string":"Information Technologies Institute, Centre for Research and Technology Hellas,Thessaloniki,Greece","institution_ids":["https://openalex.org/I4210093649","https://openalex.org/I4210134249"]},{"raw_affiliation_string":"Information Technologies Institute, Centre for Research and Technology Hellas, Thessaloniki, Greece","institution_ids":["https://openalex.org/I4210093649","https://openalex.org/I4210134249"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027935561","display_name":"George Kalpakis","orcid":"https://orcid.org/0000-0002-0862-0074"},"institutions":[{"id":"https://openalex.org/I4210134249","display_name":"Centre for Research and Technology Hellas","ror":"https://ror.org/03bndpq63","country_code":"GR","type":"facility","lineage":["https://openalex.org/I4210134249"]},{"id":"https://openalex.org/I4210093649","display_name":"Information Technologies Institute","ror":"https://ror.org/0069akp70","country_code":"GR","type":"nonprofit","lineage":["https://openalex.org/I4210093649"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"George Kalpakis","raw_affiliation_strings":["Information Technologies Institute, Centre for Research and Technology Hellas,Thessaloniki,Greece","Information Technologies Institute, Centre for Research and Technology Hellas, Thessaloniki, Greece"],"affiliations":[{"raw_affiliation_string":"Information Technologies Institute, Centre for Research and Technology Hellas,Thessaloniki,Greece","institution_ids":["https://openalex.org/I4210093649","https://openalex.org/I4210134249"]},{"raw_affiliation_string":"Information Technologies Institute, Centre for Research and Technology Hellas, Thessaloniki, Greece","institution_ids":["https://openalex.org/I4210093649","https://openalex.org/I4210134249"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045426580","display_name":"Theodora Tsikrika","orcid":"https://orcid.org/0000-0003-4148-9028"},"institutions":[{"id":"https://openalex.org/I4210093649","display_name":"Information Technologies Institute","ror":"https://ror.org/0069akp70","country_code":"GR","type":"nonprofit","lineage":["https://openalex.org/I4210093649"]},{"id":"https://openalex.org/I4210134249","display_name":"Centre for Research and Technology Hellas","ror":"https://ror.org/03bndpq63","country_code":"GR","type":"facility","lineage":["https://openalex.org/I4210134249"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Theodora Tsikrika","raw_affiliation_strings":["Information Technologies Institute, Centre for Research and Technology Hellas,Thessaloniki,Greece","Information Technologies Institute, Centre for Research and Technology Hellas, Thessaloniki, Greece"],"affiliations":[{"raw_affiliation_string":"Information Technologies Institute, Centre for Research and Technology Hellas,Thessaloniki,Greece","institution_ids":["https://openalex.org/I4210093649","https://openalex.org/I4210134249"]},{"raw_affiliation_string":"Information Technologies Institute, Centre for Research and Technology Hellas, Thessaloniki, Greece","institution_ids":["https://openalex.org/I4210093649","https://openalex.org/I4210134249"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065313479","display_name":"Stefanos Vrochidis","orcid":"https://orcid.org/0000-0002-2505-9178"},"institutions":[{"id":"https://openalex.org/I4210093649","display_name":"Information Technologies Institute","ror":"https://ror.org/0069akp70","country_code":"GR","type":"nonprofit","lineage":["https://openalex.org/I4210093649"]},{"id":"https://openalex.org/I4210134249","display_name":"Centre for Research and Technology Hellas","ror":"https://ror.org/03bndpq63","country_code":"GR","type":"facility","lineage":["https://openalex.org/I4210134249"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Stefanos Vrochidis","raw_affiliation_strings":["Information Technologies Institute, Centre for Research and Technology Hellas,Thessaloniki,Greece","Information Technologies Institute, Centre for Research and Technology Hellas, Thessaloniki, Greece"],"affiliations":[{"raw_affiliation_string":"Information Technologies Institute, Centre for Research and Technology Hellas,Thessaloniki,Greece","institution_ids":["https://openalex.org/I4210093649","https://openalex.org/I4210134249"]},{"raw_affiliation_string":"Information Technologies Institute, Centre for Research and Technology Hellas, Thessaloniki, Greece","institution_ids":["https://openalex.org/I4210093649","https://openalex.org/I4210134249"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084122016","display_name":"Ioannis Kompatsiaris","orcid":"https://orcid.org/0000-0001-6447-9020"},"institutions":[{"id":"https://openalex.org/I4210093649","display_name":"Information Technologies Institute","ror":"https://ror.org/0069akp70","country_code":"GR","type":"nonprofit","lineage":["https://openalex.org/I4210093649"]},{"id":"https://openalex.org/I4210134249","display_name":"Centre for Research and Technology Hellas","ror":"https://ror.org/03bndpq63","country_code":"GR","type":"facility","lineage":["https://openalex.org/I4210134249"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Ioannis Kompatsiaris","raw_affiliation_strings":["Information Technologies Institute, Centre for Research and Technology Hellas,Thessaloniki,Greece","Information Technologies Institute, Centre for Research and Technology Hellas, Thessaloniki, Greece"],"affiliations":[{"raw_affiliation_string":"Information Technologies Institute, Centre for Research and Technology Hellas,Thessaloniki,Greece","institution_ids":["https://openalex.org/I4210093649","https://openalex.org/I4210134249"]},{"raw_affiliation_string":"Information Technologies Institute, Centre for Research and Technology Hellas, Thessaloniki, Greece","institution_ids":["https://openalex.org/I4210093649","https://openalex.org/I4210134249"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5051296136"],"corresponding_institution_ids":["https://openalex.org/I4210093649","https://openalex.org/I4210134249"],"apc_list":null,"apc_paid":null,"fwci":0.2896,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.57114737,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"54","last_page":"61"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/latent-dirichlet-allocation","display_name":"Latent Dirichlet allocation","score":0.8549747467041016},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.8257113695144653},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7055639624595642},{"id":"https://openalex.org/keywords/fuzzy-clustering","display_name":"Fuzzy clustering","score":0.5170312523841858},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5120142102241516},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5085660219192505},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.4842388331890106},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.43899327516555786},{"id":"https://openalex.org/keywords/word-embedding","display_name":"Word embedding","score":0.43446528911590576},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.42428505420684814},{"id":"https://openalex.org/keywords/document-clustering","display_name":"Document clustering","score":0.4226763844490051},{"id":"https://openalex.org/keywords/hierarchical-clustering","display_name":"Hierarchical clustering","score":0.41813090443611145},{"id":"https://openalex.org/keywords/paragraph","display_name":"Paragraph","score":0.4143880009651184},{"id":"https://openalex.org/keywords/clustering-high-dimensional-data","display_name":"Clustering high-dimensional data","score":0.41247814893722534},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.40848392248153687},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.3304332494735718},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18695563077926636}],"concepts":[{"id":"https://openalex.org/C500882744","wikidata":"https://www.wikidata.org/wiki/Q269236","display_name":"Latent Dirichlet allocation","level":3,"score":0.8549747467041016},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.8257113695144653},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7055639624595642},{"id":"https://openalex.org/C17212007","wikidata":"https://www.wikidata.org/wiki/Q5511111","display_name":"Fuzzy clustering","level":3,"score":0.5170312523841858},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5120142102241516},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5085660219192505},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.4842388331890106},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.43899327516555786},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.43446528911590576},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.42428505420684814},{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.4226763844490051},{"id":"https://openalex.org/C92835128","wikidata":"https://www.wikidata.org/wiki/Q1277447","display_name":"Hierarchical clustering","level":3,"score":0.41813090443611145},{"id":"https://openalex.org/C2777206241","wikidata":"https://www.wikidata.org/wiki/Q194431","display_name":"Paragraph","level":2,"score":0.4143880009651184},{"id":"https://openalex.org/C184509293","wikidata":"https://www.wikidata.org/wiki/Q5136711","display_name":"Clustering high-dimensional data","level":3,"score":0.41247814893722534},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40848392248153687},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.3304332494735718},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18695563077926636},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/eisic49498.2019.9108898","is_oa":false,"landing_page_url":"https://doi.org/10.1109/eisic49498.2019.9108898","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 European Intelligence and Security Informatics Conference (EISIC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.800000011920929,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W1499253590","https://openalex.org/W1524787014","https://openalex.org/W1662133657","https://openalex.org/W1669302834","https://openalex.org/W1673310716","https://openalex.org/W1790954942","https://openalex.org/W1880262756","https://openalex.org/W1992009922","https://openalex.org/W1995450389","https://openalex.org/W1996510517","https://openalex.org/W2004192095","https://openalex.org/W2016381774","https://openalex.org/W2033403400","https://openalex.org/W2073459066","https://openalex.org/W2120688485","https://openalex.org/W2131681506","https://openalex.org/W2131744502","https://openalex.org/W2150593711","https://openalex.org/W2153579005","https://openalex.org/W2162833336","https://openalex.org/W2167428023","https://openalex.org/W2250539671","https://openalex.org/W2254387803","https://openalex.org/W2341171179","https://openalex.org/W2901052165","https://openalex.org/W2938824800","https://openalex.org/W2951943225","https://openalex.org/W2963083182","https://openalex.org/W2966207845","https://openalex.org/W2998704965","https://openalex.org/W3099768174","https://openalex.org/W4231510805","https://openalex.org/W4233135949","https://openalex.org/W4235169531","https://openalex.org/W4235539094","https://openalex.org/W4244030505","https://openalex.org/W4285719527","https://openalex.org/W4294170691","https://openalex.org/W6637131181","https://openalex.org/W6637216817","https://openalex.org/W6639619044","https://openalex.org/W6668990524","https://openalex.org/W6679775712","https://openalex.org/W6680532216","https://openalex.org/W6682691769","https://openalex.org/W6684050148","https://openalex.org/W6691749348","https://openalex.org/W6761549384"],"related_works":["https://openalex.org/W2769501189","https://openalex.org/W4315588616","https://openalex.org/W4312773271","https://openalex.org/W2888805565","https://openalex.org/W2962686197","https://openalex.org/W2207653751","https://openalex.org/W3005513013","https://openalex.org/W2611137333","https://openalex.org/W4309228610","https://openalex.org/W4294597112"],"abstract_inverted_index":{"Grouping":[0],"large":[1],"amounts":[2],"of":[3,13,19,28],"data":[4],"is":[5],"critical":[6],"for":[7],"various":[8],"tasks,":[9],"including":[10],"the":[11,53],"identification":[12],"content":[14],"on":[15,101],"a":[16,26,62],"specific":[17],"topic":[18,42],"interest":[20],"(such":[21],"as":[22,80,122],"terrorism-related":[23],"content)":[24],"within":[25],"collection":[27],"material":[29],"gathered":[30],"from":[31],"online":[32],"sources.":[33],"Various":[34],"existing":[35],"approaches":[36],"typically":[37],"extract":[38],"relevant":[39],"features":[40],"using":[41,65,116,139],"distributions":[43],"and/or":[44],"embedding":[45],"methods,":[46,82],"and":[47,74,99,105,127],"subsequently":[48],"apply":[49],"clustering":[50,88,98,118,146],"techniques":[51],"in":[52,83,142,148],"derived":[54],"representation":[55,81],"space.":[56],"In":[57],"this":[58],"work,":[59],"we":[60],"present":[61],"comparative":[63],"study":[64],"Latent":[66],"Dirichlet":[67],"Allocation":[68],"(LDA),":[69],"Paragraph-Vector":[70,75],"Distributed":[71,76],"Bag-of-Words":[72],"(PV-DBOW),":[73],"Memory":[77],"(PV-DM)":[78],"models":[79],"conjunction":[84],"with":[85],"five":[86],"traditional":[87],"algorithms,":[89],"namely":[90],"k-means,":[91,93],"spherical":[92],"possibilistic":[94],"fuzzy":[95],"c-means,":[96],"agglomerative":[97],"NMF,":[100],"two":[102],"publicly":[103],"available":[104,133],"one":[106],"proprietary":[107],"datasets.":[108,150],"Fifteen":[109],"combinations":[110],"are":[111,114],"formed":[112],"which":[113],"assessed":[115],"external":[117],"validity":[119],"measures,":[120],"such":[121],"Adjusted":[123,128],"Mutual":[124],"Information":[125],"(AMI)":[126],"Rand":[129],"Index":[130],"(ARI)":[131],"against":[132],"ground-truth.":[134],"Our":[135],"results":[136],"indicate":[137],"that":[138],"PV-DBOW":[140],"leads":[141],"general":[143],"to":[144],"better":[145],"performance":[147],"all":[149]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
