{"id":"https://openalex.org/W2780862131","doi":"https://doi.org/10.1145/3148011.3148019","title":"Efficient Clustering from Distributions over Topics","display_name":"Efficient Clustering from Distributions over Topics","publication_year":2017,"publication_date":"2017-12-04","ids":{"openalex":"https://openalex.org/W2780862131","doi":"https://doi.org/10.1145/3148011.3148019","mag":"2780862131"},"language":"en","primary_location":{"id":"doi:10.1145/3148011.3148019","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3148011.3148019","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Knowledge Capture Conference","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2012.08206","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016129344","display_name":"Carlos Badenes-Olmedo","orcid":"https://orcid.org/0000-0002-2753-9917"},"institutions":[{"id":"https://openalex.org/I88060688","display_name":"Universidad Polit\u00e9cnica de Madrid","ror":"https://ror.org/03n6nwv02","country_code":"ES","type":"education","lineage":["https://openalex.org/I88060688"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Carlos Badenes-Olmedo","raw_affiliation_strings":["Ontology Engineering Group, Universidad Polit\u00e9cnica de Madrid, Boadilla del Monte, Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ontology Engineering Group, Universidad Polit\u00e9cnica de Madrid, Boadilla del Monte, Spain","institution_ids":["https://openalex.org/I88060688"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013606816","display_name":"Jos\u00e9 Luis Redondo-Garc\u00eda","orcid":"https://orcid.org/0000-0002-7413-447X"},"institutions":[{"id":"https://openalex.org/I4210123934","display_name":"Amazon (United Kingdom)","ror":"https://ror.org/02xey9634","country_code":"GB","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210123934"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jos\u00e9 Luis Redondo-Garc\u00eda","raw_affiliation_strings":["Amazon Research, Cambridge, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon Research, Cambridge, UK","institution_ids":["https://openalex.org/I4210123934"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082377531","display_name":"\u00d3scar Corcho","orcid":"https://orcid.org/0000-0002-9260-0753"},"institutions":[{"id":"https://openalex.org/I88060688","display_name":"Universidad Polit\u00e9cnica de Madrid","ror":"https://ror.org/03n6nwv02","country_code":"ES","type":"education","lineage":["https://openalex.org/I88060688"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Oscar Corcho","raw_affiliation_strings":["Ontology Engineering Group, Universidad Polit\u00e9cnica de Madrid, Boadilla del Monte, Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ontology Engineering Group, Universidad Polit\u00e9cnica de Madrid, Boadilla del Monte, Spain","institution_ids":["https://openalex.org/I88060688"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5016129344"],"corresponding_institution_ids":["https://openalex.org/I88060688"],"apc_list":null,"apc_paid":null,"fwci":0.8675,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.74072922,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"3","issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7944387197494507},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7591889500617981},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.7474863529205322},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5942592620849609},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5334810018539429},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4951852858066559},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4941156208515167},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.4869707524776459},{"id":"https://openalex.org/keywords/document-clustering","display_name":"Document clustering","score":0.4388159513473511},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4288322329521179},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3133736252784729},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.16426479816436768},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10046613216400146}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7944387197494507},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7591889500617981},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.7474863529205322},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5942592620849609},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5334810018539429},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4951852858066559},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4941156208515167},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.4869707524776459},{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.4388159513473511},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4288322329521179},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3133736252784729},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.16426479816436768},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10046613216400146},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3148011.3148019","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3148011.3148019","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Knowledge Capture Conference","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2012.08206","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2012.08206","pdf_url":"https://arxiv.org/pdf/2012.08206","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:oa.upm.es:52009","is_oa":true,"landing_page_url":"https://oa.upm.es/52009/","pdf_url":"https://oa.upm.es/52009/1/efficient-clustering-distributions.pdf","source":{"id":"https://openalex.org/S4377196323","display_name":"UPM Digital Archive (Technical University of Madrid)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I88060688","host_organization_name":"Universidad Polit\u00e9cnica de Madrid","host_organization_lineage":["https://openalex.org/I88060688"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the Knowledge Capture Conference on - K-CAP 2017 | Knowledge Capture Conference (K-CAP 2017) | 04-06 Dec 2017 | Austin, Texas, United States","raw_type":"info:eu-repo/semantics/acceptedVersion"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2012.08206","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2012.08206","pdf_url":"https://arxiv.org/pdf/2012.08206","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.5199999809265137}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1508001288","https://openalex.org/W1516184288","https://openalex.org/W1593239840","https://openalex.org/W1632953821","https://openalex.org/W1673310716","https://openalex.org/W1877481539","https://openalex.org/W1880262756","https://openalex.org/W1979044015","https://openalex.org/W1979936637","https://openalex.org/W2001932471","https://openalex.org/W2020842694","https://openalex.org/W2028742638","https://openalex.org/W2035003780","https://openalex.org/W2038228855","https://openalex.org/W2081534675","https://openalex.org/W2098126593","https://openalex.org/W2101903972","https://openalex.org/W2108399535","https://openalex.org/W2121940249","https://openalex.org/W2128925311","https://openalex.org/W2129066856","https://openalex.org/W2134731454","https://openalex.org/W2146950091","https://openalex.org/W2147152072","https://openalex.org/W2159835345","https://openalex.org/W2166354010","https://openalex.org/W2174706414","https://openalex.org/W2278264611","https://openalex.org/W2334889010","https://openalex.org/W2401253355","https://openalex.org/W2499811908","https://openalex.org/W2527655278","https://openalex.org/W2562836854","https://openalex.org/W2751976275","https://openalex.org/W2914365896","https://openalex.org/W2942992487","https://openalex.org/W2952098845","https://openalex.org/W3013669377","https://openalex.org/W3099640513","https://openalex.org/W4237791300","https://openalex.org/W4300009529"],"related_works":["https://openalex.org/W2019737068","https://openalex.org/W2899601636","https://openalex.org/W4254379378","https://openalex.org/W3015674157","https://openalex.org/W4206655101","https://openalex.org/W4237592971","https://openalex.org/W4309228610","https://openalex.org/W4248157169","https://openalex.org/W2387982377","https://openalex.org/W2002320519"],"abstract_inverted_index":{"There":[0],"are":[1,49,79],"many":[2],"scenarios":[3],"where":[4,139],"we":[5,110],"may":[6],"want":[7],"to":[8,41,88,133,151],"find":[9],"pairs":[10,93],"of":[11,56,92,98,119,137,162,172,200],"textually":[12],"similar":[13,76,157],"documents":[14,126,138,158],"in":[15,65,86,103,127,159,198],"a":[16,20,120,128,131],"large":[17],"corpus":[18,59],"(e.g.":[19],"researcher":[21],"doing":[22],"literature":[23,67],"review,":[24],"or":[25],"an":[26,112],"R&D":[27],"project":[28,31],"manager":[29],"analyzing":[30],"proposals).":[32],"To":[33],"programmatically":[34],"discover":[35],"those":[36,43],"connections":[37],"can":[38,143],"help":[39],"experts":[40],"achieve":[42],"goals,":[44],"but":[45],"brute-force":[46],"pairwise":[47],"comparisons":[48],"not":[50],"computationally":[51],"adequate":[52],"when":[53,155],"the":[54,57,66,69,84,90,117,125,140,160,173,182,194],"size":[55],"document":[58],"is":[60],"too":[61],"large.":[62],"Some":[63],"algorithms":[64],"divide":[68],"search":[70],"space":[71],"into":[72],"regions":[73],"containing":[74],"potentially":[75],"documents,":[77],"which":[78],"later":[80],"processed":[81],"separately":[82],"from":[83],"rest":[85],"order":[87],"reduce":[89],"number":[91],"compared.":[94],"However,":[95],"this":[96,108],"kind":[97],"unsupervised":[99],"methods":[100],"still":[101],"incur":[102],"high":[104],"temporal":[105],"costs.":[106],"In":[107],"paper,":[109],"present":[111],"approach":[113,148,169,190],"that":[114,188],"relies":[115],"on":[116],"results":[118,154],"topic":[121,183],"modeling":[122,184],"algorithm":[123],"over":[124],"collection,":[129],"as":[130],"means":[132],"identify":[134],"smaller":[135],"subsets":[136],"similarity":[141],"function":[142],"then":[144],"be":[145],"computed.":[146],"This":[147],"has":[149],"proved":[150],"obtain":[152],"promising":[153],"identifying":[156],"domain":[161],"scientific":[163],"publications.":[164],"We":[165],"have":[166],"compared":[167],"our":[168,189],"against":[170],"state":[171],"art":[174],"clustering":[175],"techniques":[176,197],"and":[177],"with":[178],"different":[179],"configurations":[180],"for":[181],"algorithm.":[185],"Results":[186],"suggest":[187],"outperforms":[191],"(>":[192],"0.5)":[193],"other":[195],"analyzed":[196],"terms":[199],"efficiency.":[201]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
