{"id":"https://openalex.org/W2201099741","doi":"https://doi.org/10.1109/bigdata.2015.7363845","title":"Scalable k-NN based text clustering","display_name":"Scalable k-NN based text clustering","publication_year":2015,"publication_date":"2015-10-01","ids":{"openalex":"https://openalex.org/W2201099741","doi":"https://doi.org/10.1109/bigdata.2015.7363845","mag":"2201099741"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2015.7363845","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2015.7363845","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hal.science/hal-01525701","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045513580","display_name":"Alessandro Lulli","orcid":"https://orcid.org/0000-0003-3881-8900"},"institutions":[{"id":"https://openalex.org/I108290504","display_name":"University of Pisa","ror":"https://ror.org/03ad39j10","country_code":"IT","type":"education","lineage":["https://openalex.org/I108290504"]},{"id":"https://openalex.org/I122991210","display_name":"Istituto di Scienza e Tecnologie dell'Informazione \"Alessandro Faedo\"","ror":"https://ror.org/05kacka20","country_code":"IT","type":"facility","lineage":["https://openalex.org/I122991210","https://openalex.org/I4210155236"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Alessandro Lulli","raw_affiliation_strings":["ISTI, CNR, Pisa, Italy","University of Pisa, Italy"],"affiliations":[{"raw_affiliation_string":"ISTI, CNR, Pisa, Italy","institution_ids":["https://openalex.org/I122991210"]},{"raw_affiliation_string":"University of Pisa, Italy","institution_ids":["https://openalex.org/I108290504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050650048","display_name":"Thibault Debatty","orcid":"https://orcid.org/0000-0003-2373-566X"},"institutions":[{"id":"https://openalex.org/I150517870","display_name":"Royal Military Academy","ror":"https://ror.org/02vmnye06","country_code":"BE","type":"education","lineage":["https://openalex.org/I150517870"]},{"id":"https://openalex.org/I1902872","display_name":"EURECOM","ror":"https://ror.org/00sse7z02","country_code":"FR","type":"education","lineage":["https://openalex.org/I1902872","https://openalex.org/I205703379"]}],"countries":["BE","FR"],"is_corresponding":false,"raw_author_name":"Thibault Debatty","raw_affiliation_strings":["EURECOM, France","Royal Military Academy, Brussels, Belgium"],"affiliations":[{"raw_affiliation_string":"EURECOM, France","institution_ids":["https://openalex.org/I1902872"]},{"raw_affiliation_string":"Royal Military Academy, Brussels, Belgium","institution_ids":["https://openalex.org/I150517870"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027942683","display_name":"Matteo Dell\u2019Amico","orcid":"https://orcid.org/0000-0003-3152-4993"},"institutions":[{"id":"https://openalex.org/I1308906816","display_name":"NortonLifeLock (United States)","ror":"https://ror.org/0449t3a80","country_code":"US","type":"company","lineage":["https://openalex.org/I1308906816"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matteo Dell'Amico","raw_affiliation_strings":["Symantec Research Labs"],"affiliations":[{"raw_affiliation_string":"Symantec Research Labs","institution_ids":["https://openalex.org/I1308906816"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017009335","display_name":"Pietro Michiardi","orcid":"https://orcid.org/0000-0003-4675-7677"},"institutions":[{"id":"https://openalex.org/I1902872","display_name":"EURECOM","ror":"https://ror.org/00sse7z02","country_code":"FR","type":"education","lineage":["https://openalex.org/I1902872","https://openalex.org/I205703379"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Pietro Michiardi","raw_affiliation_strings":["EURECOM, France"],"affiliations":[{"raw_affiliation_string":"EURECOM, France","institution_ids":["https://openalex.org/I1902872"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015647058","display_name":"Laura Ricci","orcid":"https://orcid.org/0000-0002-8179-8215"},"institutions":[{"id":"https://openalex.org/I108290504","display_name":"University of Pisa","ror":"https://ror.org/03ad39j10","country_code":"IT","type":"education","lineage":["https://openalex.org/I108290504"]},{"id":"https://openalex.org/I122991210","display_name":"Istituto di Scienza e Tecnologie dell'Informazione \"Alessandro Faedo\"","ror":"https://ror.org/05kacka20","country_code":"IT","type":"facility","lineage":["https://openalex.org/I122991210","https://openalex.org/I4210155236"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Laura Ricci","raw_affiliation_strings":["ISTI, CNR, Pisa, Italy","University of Pisa, Italy"],"affiliations":[{"raw_affiliation_string":"ISTI, CNR, Pisa, Italy","institution_ids":["https://openalex.org/I122991210"]},{"raw_affiliation_string":"University of Pisa, Italy","institution_ids":["https://openalex.org/I108290504"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5045513580"],"corresponding_institution_ids":["https://openalex.org/I108290504","https://openalex.org/I122991210"],"apc_list":null,"apc_paid":null,"fwci":7.4666,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.97043991,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"93","issue":null,"first_page":"958","last_page":"963"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7874680757522583},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6235532760620117},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5487830638885498},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3773002624511719},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.16967269778251648}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7874680757522583},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6235532760620117},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5487830638885498},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3773002624511719},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.16967269778251648}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1109/bigdata.2015.7363845","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2015.7363845","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-01525701v1","is_oa":true,"landing_page_url":"https://hal.science/hal-01525701","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2015 IEEE International Conference on Big Data, Oct 2015, Santa Clara, United States. pp.958 - 963, &#x27E8;10.1109/BigData.2015.7363845&#x27E9;","raw_type":"Conference papers"},{"id":"pmh:oai:arpi.unipi.it:11568/766224","is_oa":false,"landing_page_url":"http://hdl.handle.net/11568/766224","pdf_url":null,"source":{"id":"https://openalex.org/S4377196265","display_name":"CINECA IRIS Institutial research information system (University of Pisa)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I108290504","host_organization_name":"University of Pisa","host_organization_lineage":["https://openalex.org/I108290504"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/conferenceObject"},{"id":"pmh:oai:dnet:people______::b56868ea72505ad48d6cbf6c76e32408","is_oa":true,"landing_page_url":"https://openportal.isti.cnr.it/doc?id=people______::b56868ea72505ad48d6cbf6c76e32408","pdf_url":null,"source":{"id":"https://openalex.org/S7407055261","display_name":"ISTI Open Portal","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Big Data 2015 - IEEE International Conference on Big Data, pp. 958\u2013963, Santa Clara, CA, USA, 29 October - 01 November 2015","raw_type":"Conference article"},{"id":"pmh:oai:iris.unige.it:11567/1071002","is_oa":false,"landing_page_url":"https://hdl.handle.net/11567/1071002","pdf_url":null,"source":{"id":"https://openalex.org/S4377196291","display_name":"CINECA IRIS Institutial Research Information System (University of Genoa)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I83816512","host_organization_name":"University of Genoa","host_organization_lineage":["https://openalex.org/I83816512"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-01525701v1","is_oa":true,"landing_page_url":"https://hal.science/hal-01525701","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2015 IEEE International Conference on Big Data, Oct 2015, Santa Clara, United States. pp.958 - 963, &#x27E8;10.1109/BigData.2015.7363845&#x27E9;","raw_type":"Conference papers"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W46452414","https://openalex.org/W206566442","https://openalex.org/W1534625513","https://openalex.org/W1593715786","https://openalex.org/W1651093245","https://openalex.org/W1736726159","https://openalex.org/W1958077162","https://openalex.org/W1971680255","https://openalex.org/W1987971958","https://openalex.org/W1992654648","https://openalex.org/W2011089773","https://openalex.org/W2015953751","https://openalex.org/W2030951871","https://openalex.org/W2031944888","https://openalex.org/W2053061798","https://openalex.org/W2081980673","https://openalex.org/W2087962968","https://openalex.org/W2088011174","https://openalex.org/W2096141509","https://openalex.org/W2110026675","https://openalex.org/W2113586398","https://openalex.org/W2127048411","https://openalex.org/W2128600649","https://openalex.org/W2130540277","https://openalex.org/W2150593711","https://openalex.org/W2153579005","https://openalex.org/W2158018156","https://openalex.org/W2163972006","https://openalex.org/W2168018439","https://openalex.org/W2291321158","https://openalex.org/W2949166564","https://openalex.org/W3102641634","https://openalex.org/W4236122429","https://openalex.org/W4294170691","https://openalex.org/W6601892241","https://openalex.org/W6632081347","https://openalex.org/W6635622067","https://openalex.org/W6637231022","https://openalex.org/W6643319821","https://openalex.org/W6679406034","https://openalex.org/W6682691769","https://openalex.org/W6684624762"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2389214306","https://openalex.org/W4396696052","https://openalex.org/W4402327032"],"abstract_inverted_index":{"Clustering":[0],"items":[1],"using":[2],"textual":[3],"features":[4],"is":[5,59,70,112],"an":[6,54,86],"important":[7],"problem":[8],"with":[9],"many":[10],"applications,":[11],"such":[12,34],"as":[13,19,21],"root-cause":[14],"analysis":[15],"of":[16,33,101],"spam":[17],"campaigns,":[18],"well":[20],"identifying":[22],"common":[23],"topics":[24],"in":[25],"social":[26],"media.":[27],"Due":[28],"to":[29,62,71,106,119],"the":[30,73],"sheer":[31],"size":[32],"data,":[35],"algorithmic":[36],"scalability":[37,74],"becomes":[38],"a":[39],"major":[40],"concern.":[41],"In":[42],"this":[43],"work,":[44],"we":[45,82,91],"present":[46],"our":[47,80],"approach":[48],"for":[49],"text":[50],"clustering":[51],"that":[52,78,97],"builds":[53],"approximate":[55],"k-NN":[56,102],"graph,":[57],"which":[58],"then":[60],"used":[61],"compute":[63],"connected":[64],"components":[65],"representing":[66],"clusters.":[67,109],"Our":[68,110],"focus":[69],"understand":[72],"/":[75],"accuracy":[76],"tradeoff":[77],"underlies":[79],"method:":[81],"do":[83],"so":[84],"through":[85],"extensive":[87],"experimental":[88],"campaign,":[89],"where":[90],"use":[92],"real-life":[93],"datasets,":[94],"and":[95,114],"show":[96],"even":[98],"rough":[99],"approximations":[100],"graphs":[103],"are":[104],"sufficient":[105],"identify":[107],"valid":[108],"method":[111],"scalable":[113],"can":[115],"be":[116],"easily":[117],"tuned":[118],"meet":[120],"requirements":[121],"stemming":[122],"from":[123],"different":[124],"application":[125],"domains.":[126]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":4}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
