{"id":"https://openalex.org/W3084495208","doi":"https://doi.org/10.1186/s40537-020-00344-3","title":"A set theory based similarity measure for text clustering and classification","display_name":"A set theory based similarity measure for text clustering and classification","publication_year":2020,"publication_date":"2020-09-14","ids":{"openalex":"https://openalex.org/W3084495208","doi":"https://doi.org/10.1186/s40537-020-00344-3","mag":"3084495208"},"language":"en","primary_location":{"id":"doi:10.1186/s40537-020-00344-3","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-020-00344-3","pdf_url":"https://journalofbigdata.springeropen.com/track/pdf/10.1186/s40537-020-00344-3","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://journalofbigdata.springeropen.com/track/pdf/10.1186/s40537-020-00344-3","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012051024","display_name":"Ali A. Amer","orcid":"https://orcid.org/0000-0002-2002-948X"},"institutions":[{"id":"https://openalex.org/I36197038","display_name":"Taiz University","ror":"https://ror.org/03jwcxq96","country_code":"YE","type":"education","lineage":["https://openalex.org/I36197038"]}],"countries":["YE"],"is_corresponding":true,"raw_author_name":"Ali A. Amer","raw_affiliation_strings":["Computer Science Department, Taiz University, Taiz, Yemen"],"raw_orcid":"https://orcid.org/0000-0002-2002-948X","affiliations":[{"raw_affiliation_string":"Computer Science Department, Taiz University, Taiz, Yemen","institution_ids":["https://openalex.org/I36197038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053616420","display_name":"Hassan I. Abdalla","orcid":"https://orcid.org/0000-0001-7940-9391"},"institutions":[{"id":"https://openalex.org/I91044093","display_name":"Zayed University","ror":"https://ror.org/03snqfa66","country_code":"AE","type":"education","lineage":["https://openalex.org/I91044093"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Hassan I. Abdalla","raw_affiliation_strings":["College of Technological Innovation, Zayed University, P.O. Box 144534, Abu Dhabi, UAE"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Technological Innovation, Zayed University, P.O. Box 144534, Abu Dhabi, UAE","institution_ids":["https://openalex.org/I91044093"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5012051024"],"corresponding_institution_ids":["https://openalex.org/I36197038"],"apc_list":{"value":1060,"currency":"GBP","value_usd":1300},"apc_paid":{"value":1060,"currency":"GBP","value_usd":1300},"fwci":5.2998,"has_fulltext":true,"cited_by_count":55,"citation_normalized_percentile":{"value":0.96387323,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":"7","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.8747299909591675},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7587152719497681},{"id":"https://openalex.org/keywords/similarity-measure","display_name":"Similarity measure","score":0.7039232850074768},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6778693795204163},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6768018007278442},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.6339577436447144},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5366811752319336},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.497882604598999},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.49089962244033813},{"id":"https://openalex.org/keywords/document-clustering","display_name":"Document clustering","score":0.48479098081588745},{"id":"https://openalex.org/keywords/k-nearest-neighbors-algorithm","display_name":"k-nearest neighbors algorithm","score":0.4623692035675049},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4286287724971771},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33878111839294434}],"concepts":[{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.8747299909591675},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7587152719497681},{"id":"https://openalex.org/C2776517306","wikidata":"https://www.wikidata.org/wiki/Q29017317","display_name":"Similarity measure","level":2,"score":0.7039232850074768},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6778693795204163},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6768018007278442},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.6339577436447144},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5366811752319336},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.497882604598999},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.49089962244033813},{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.48479098081588745},{"id":"https://openalex.org/C113238511","wikidata":"https://www.wikidata.org/wiki/Q1071612","display_name":"k-nearest neighbors algorithm","level":2,"score":0.4623692035675049},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4286287724971771},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33878111839294434},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1186/s40537-020-00344-3","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-020-00344-3","pdf_url":"https://journalofbigdata.springeropen.com/track/pdf/10.1186/s40537-020-00344-3","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:dad798d9e45f466a8c87a1f6f97a8751","is_oa":true,"landing_page_url":"https://doaj.org/article/dad798d9e45f466a8c87a1f6f97a8751","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Big Data, Vol 7, Iss 1, Pp 1-43 (2020)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s40537-020-00344-3","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-020-00344-3","pdf_url":"https://journalofbigdata.springeropen.com/track/pdf/10.1186/s40537-020-00344-3","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7900000214576721}],"awards":[{"id":"https://openalex.org/G1455995528","display_name":null,"funder_award_id":"R19093","funder_id":"https://openalex.org/F4320325295","funder_display_name":"Zayed University"}],"funders":[{"id":"https://openalex.org/F4320321145","display_name":"King Saud University","ror":"https://ror.org/02f81g417"},{"id":"https://openalex.org/F4320325295","display_name":"Zayed University","ror":"https://ror.org/03snqfa66"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3084495208.pdf","grobid_xml":"https://content.openalex.org/works/W3084495208.grobid-xml"},"referenced_works_count":33,"referenced_works":["https://openalex.org/W1522930108","https://openalex.org/W1545965399","https://openalex.org/W1606710192","https://openalex.org/W1662133657","https://openalex.org/W1965555277","https://openalex.org/W1978394996","https://openalex.org/W1997377854","https://openalex.org/W2041395528","https://openalex.org/W2057923756","https://openalex.org/W2068549609","https://openalex.org/W2102166428","https://openalex.org/W2111998194","https://openalex.org/W2113620787","https://openalex.org/W2114097960","https://openalex.org/W2123402141","https://openalex.org/W2123838014","https://openalex.org/W2125214008","https://openalex.org/W2145252566","https://openalex.org/W2153233077","https://openalex.org/W2164634022","https://openalex.org/W2189081352","https://openalex.org/W2273660186","https://openalex.org/W2296515788","https://openalex.org/W2550576290","https://openalex.org/W2736451061","https://openalex.org/W2904779692","https://openalex.org/W2906507670","https://openalex.org/W2999302483","https://openalex.org/W3006942207","https://openalex.org/W3029344712","https://openalex.org/W3110683067","https://openalex.org/W4285719527","https://openalex.org/W6694771898"],"related_works":["https://openalex.org/W2366403280","https://openalex.org/W1495108544","https://openalex.org/W2091301346","https://openalex.org/W3148229873","https://openalex.org/W2319693127","https://openalex.org/W2072263576","https://openalex.org/W2474567666","https://openalex.org/W1940044583","https://openalex.org/W2806903871","https://openalex.org/W4320802053"],"abstract_inverted_index":{"Abstract":[0],"Similarity":[1],"measures":[2,35,139,181],"have":[3],"long":[4],"been":[5,43,149],"utilized":[6],"in":[7,75,143],"information":[8],"retrieval":[9],"and":[10,25,52,64,81,88,112,127,160,188],"machine":[11],"learning":[12],"domains":[13],"for":[14,61,85,99,120,125,134],"multi-purposes":[15],"including":[16],"text":[17,19,21,86],"retrieval,":[18],"clustering,":[20,126],"summarization,":[22],"plagiarism":[23],"detection,":[24],"several":[26],"other":[27],"text-processing":[28],"applications.":[29],"However,":[30],"the":[31,55,59,91,102,115,122,128,154,167],"problem":[32],"with":[33,183],"these":[34],"is":[36,68],"that,":[37],"until":[38],"recently,":[39],"there":[40],"has":[41,148],"never":[42],"one":[44],"single":[45],"measure":[46,67,84,172],"recorded":[47],"to":[48,94,185],"be":[49],"highly":[50],"effective":[51,65],"efficient":[53,63],"at":[54],"same":[56],"time.":[57],"Thus,":[58],"quest":[60],"an":[62,70],"similarity":[66,83,106,138,171],"still":[69],"open-ended":[71],"challenge.":[72],"This":[73],"study,":[74],"consequence,":[76],"introduces":[77],"a":[78,96,175],"new":[79],"highly-effective":[80],"time-efficient":[82],"clustering":[87],"classification.":[89],"Furthermore,":[90],"study":[92],"aims":[93],"provide":[95],"comprehensive":[97],"scrutinization":[98],"seven":[100],"of":[101,130,153],"most":[103,155],"widely":[104],"used":[105],"measures,":[107],"mainly":[108],"concerning":[109],"their":[110],"effectiveness":[111,187],"efficiency.":[113,189],"Using":[114],"K-nearest":[116],"neighbor":[117],"algorithm":[118,124],"(KNN)":[119],"classification,":[121],"K-means":[123],"bag":[129],"word":[131],"(BoW)":[132],"model":[133],"feature":[135],"selection,":[136],"all":[137,179],"are":[140],"carefully":[141],"examined":[142],"detail.":[144],"The":[145,162],"experimental":[146],"evaluation":[147],"made":[150],"on":[151],"two":[152],"popular":[156],"datasets,":[157],"namely,":[158],"Reuters-21":[159],"Web-KB.":[161],"obtained":[163],"results":[164],"confirm":[165],"that":[166],"proposed":[168],"set":[169],"theory-based":[170],"(STB-SM),":[173],"as":[174],"pre-eminent":[176],"measure,":[177],"outweighs":[178],"state-of-art":[180],"significantly":[182],"regards":[184],"both":[186]},"counts_by_year":[{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":16},{"year":2021,"cited_by_count":16},{"year":2020,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
