{"id":"https://openalex.org/W2029219570","doi":"https://doi.org/10.1145/2644866.2644868","title":"An ensemble approach for text document clustering using Wikipedia concepts","display_name":"An ensemble approach for text document clustering using Wikipedia concepts","publication_year":2014,"publication_date":"2014-09-16","ids":{"openalex":"https://openalex.org/W2029219570","doi":"https://doi.org/10.1145/2644866.2644868","mag":"2029219570"},"language":"en","primary_location":{"id":"doi:10.1145/2644866.2644868","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2644866.2644868","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2014 ACM symposium on Document engineering","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113643083","display_name":"Seyednaser Nourashrafeddin","orcid":null},"institutions":[{"id":"https://openalex.org/I129902397","display_name":"Dalhousie University","ror":"https://ror.org/01e6qks80","country_code":"CA","type":"education","lineage":["https://openalex.org/I129902397"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Seyednaser Nourashrafeddin","raw_affiliation_strings":["Dalhousie University, Halifax, NS, Canada"],"affiliations":[{"raw_affiliation_string":"Dalhousie University, Halifax, NS, Canada","institution_ids":["https://openalex.org/I129902397"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013989913","display_name":"Evangelos Milios","orcid":"https://orcid.org/0000-0001-5549-4675"},"institutions":[{"id":"https://openalex.org/I129902397","display_name":"Dalhousie University","ror":"https://ror.org/01e6qks80","country_code":"CA","type":"education","lineage":["https://openalex.org/I129902397"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Evangelos Milios","raw_affiliation_strings":["Dalhousie University, Halifax, NS, Canada"],"affiliations":[{"raw_affiliation_string":"Dalhousie University, Halifax, NS, Canada","institution_ids":["https://openalex.org/I129902397"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083914227","display_name":"Dirk V. Arnold","orcid":"https://orcid.org/0000-0001-5367-6862"},"institutions":[{"id":"https://openalex.org/I129902397","display_name":"Dalhousie University","ror":"https://ror.org/01e6qks80","country_code":"CA","type":"education","lineage":["https://openalex.org/I129902397"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Drik V. Arnold","raw_affiliation_strings":["Dalhousie University, Halifax, NS, Canada"],"affiliations":[{"raw_affiliation_string":"Dalhousie University, Halifax, NS, Canada","institution_ids":["https://openalex.org/I129902397"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5113643083"],"corresponding_institution_ids":["https://openalex.org/I129902397"],"apc_list":null,"apc_paid":null,"fwci":4.09,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.94124896,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"107","last_page":"116"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/document-clustering","display_name":"Document clustering","score":0.8631051778793335},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.76357501745224},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7265415787696838},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.6637221574783325},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6189774870872498},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5495390892028809},{"id":"https://openalex.org/keywords/tf\u2013idf","display_name":"tf\u2013idf","score":0.5152840614318848},{"id":"https://openalex.org/keywords/text-corpus","display_name":"Text corpus","score":0.4628872573375702},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4488159716129303},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4349210858345032},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4329608380794525},{"id":"https://openalex.org/keywords/document-retrieval","display_name":"Document retrieval","score":0.411912739276886}],"concepts":[{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.8631051778793335},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.76357501745224},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7265415787696838},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.6637221574783325},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6189774870872498},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5495390892028809},{"id":"https://openalex.org/C81758059","wikidata":"https://www.wikidata.org/wiki/Q796584","display_name":"tf\u2013idf","level":3,"score":0.5152840614318848},{"id":"https://openalex.org/C2474386","wikidata":"https://www.wikidata.org/wiki/Q461183","display_name":"Text corpus","level":2,"score":0.4628872573375702},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4488159716129303},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4349210858345032},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4329608380794525},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.411912739276886},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2644866.2644868","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2644866.2644868","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2014 ACM symposium on Document engineering","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6499999761581421}],"awards":[],"funders":[{"id":"https://openalex.org/F4320324369","display_name":"Universidade Federal de Minas Gerais","ror":"https://ror.org/0176yjw32"},{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W35869492","https://openalex.org/W40976687","https://openalex.org/W1480139009","https://openalex.org/W1501827958","https://openalex.org/W1553682320","https://openalex.org/W1573641422","https://openalex.org/W1880262756","https://openalex.org/W1970544520","https://openalex.org/W1982302133","https://openalex.org/W1988735952","https://openalex.org/W1996852448","https://openalex.org/W2021457058","https://openalex.org/W2054808906","https://openalex.org/W2066073289","https://openalex.org/W2070214199","https://openalex.org/W2088314245","https://openalex.org/W2100341149","https://openalex.org/W2113076747","https://openalex.org/W2116429057","https://openalex.org/W2133517430","https://openalex.org/W2145429017","https://openalex.org/W2157361576","https://openalex.org/W2434205482","https://openalex.org/W4213009331","https://openalex.org/W6674809819"],"related_works":["https://openalex.org/W2019737068","https://openalex.org/W2899601636","https://openalex.org/W4254379378","https://openalex.org/W2035503345","https://openalex.org/W4240717064","https://openalex.org/W4214728210","https://openalex.org/W3015674157","https://openalex.org/W4206655101","https://openalex.org/W58628767","https://openalex.org/W4237592971"],"abstract_inverted_index":{"Most":[0],"text":[1,81,88,153],"clustering":[2,83,154],"algorithms":[3],"represent":[4],"a":[5,8,72,94,98],"corpus":[6,89,106],"as":[7,93],"document-term":[9,95,113,127],"matrix":[10,96],"in":[11,26,43,79,84,104,151,156,176,184],"the":[12,61,105,112,116,126,130,135,170,189],"bag":[13],"of":[14,142,191],"words":[15],"model.":[16],"The":[17,145,163],"feature":[18],"values":[19],"are":[20,107,148],"computed":[21],"based":[22,110,124,133],"on":[23,111,125,134],"term":[24,117],"frequencies":[25],"documents":[27,40],"and":[28,97,129],"no":[29],"semantic":[30],"relatedness":[31],"between":[32],"terms":[33],"is":[34,58,90],"considered.":[35],"Therefore,":[36],"two":[37,121,140,146],"semantically":[38],"similar":[39],"may":[41],"sit":[42],"different":[44],"clusters":[45,179,193],"if":[46],"they":[47],"do":[48,173],"not":[49,174],"share":[50],"any":[51],"terms.":[52],"One":[53],"solution":[54],"to":[55,59,75,138,160],"this":[56,85],"problem":[57],"enrich":[60],"document":[62,82,178,192],"representation":[63,128],"using":[64],"an":[65,157],"external":[66],"resource":[67],"like":[68],"Wikipedia.":[69],"We":[70],"propose":[71,120],"new":[73],"way":[74],"integrate":[76],"Wikipedia":[77],"concepts":[78],"partitional":[80],"work.":[86],"A":[87],"first":[91],"represented":[92],"document-concept":[99,136,171],"matrix.":[100],"Terms":[101],"that":[102,167],"exist":[103],"then":[108,149],"clustered":[109],"representation.":[114],"Given":[115],"clusters,":[118],"we":[119],"methods,":[122],"one":[123,132],"other":[131],"representation,":[137],"find":[139],"sets":[141,147],"seed":[143],"documents.":[144,162],"used":[150],"our":[152,185],"algorithm":[155],"ensemble":[158,186],"approach":[159,187],"cluster":[161],"experimental":[164],"results":[165],"show":[166],"even":[168],"though":[169],"representations":[172],"result":[175],"good":[177],"per":[180],"se,":[181],"integrating":[182],"them":[183],"improves":[188],"quality":[190],"significantly.":[194]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":4}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
