{"id":"https://openalex.org/W4391093100","doi":"https://doi.org/10.1109/bigdata59044.2023.10386645","title":"KF K-means: A High Performance K-means Implementation using Kernel Fusion","display_name":"KF K-means: A High Performance K-means Implementation using Kernel Fusion","publication_year":2023,"publication_date":"2023-12-15","ids":{"openalex":"https://openalex.org/W4391093100","doi":"https://doi.org/10.1109/bigdata59044.2023.10386645"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata59044.2023.10386645","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/bigdata59044.2023.10386645","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051694932","display_name":"Kaiming Ouyang","orcid":"https://orcid.org/0000-0002-4775-1835"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kaiming Ouyang","raw_affiliation_strings":["NVIDIA Corporation,Santa Clara,CA,USA","NVIDIA Corporation, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation,Santa Clara,CA,USA","institution_ids":["https://openalex.org/I4210127875"]},{"raw_affiliation_string":"NVIDIA Corporation, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049947637","display_name":"Vincent Tran","orcid":null},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vincent Tran","raw_affiliation_strings":["University of California,Riverside,CA,USA","University of California, Riverside, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California,Riverside,CA,USA","institution_ids":["https://openalex.org/I103635307"]},{"raw_affiliation_string":"University of California, Riverside, CA, USA","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100705519","display_name":"Jinyang Liu","orcid":"https://orcid.org/0000-0003-0177-502X"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jinyang Liu","raw_affiliation_strings":["University of California,Riverside,CA,USA","University of California, Riverside, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California,Riverside,CA,USA","institution_ids":["https://openalex.org/I103635307"]},{"raw_affiliation_string":"University of California, Riverside, CA, USA","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022569738","display_name":"Bryan M. Wong","orcid":"https://orcid.org/0000-0002-3477-8043"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bryan M. Wong","raw_affiliation_strings":["University of California,Riverside,CA,USA","University of California, Riverside, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California,Riverside,CA,USA","institution_ids":["https://openalex.org/I103635307"]},{"raw_affiliation_string":"University of California, Riverside, CA, USA","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061737717","display_name":"Zizhong Chen","orcid":"https://orcid.org/0000-0003-2578-4940"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zizhong Chen","raw_affiliation_strings":["University of California,Riverside,CA,USA","University of California, Riverside, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California,Riverside,CA,USA","institution_ids":["https://openalex.org/I103635307"]},{"raw_affiliation_string":"University of California, Riverside, CA, USA","institution_ids":["https://openalex.org/I103635307"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5051694932"],"corresponding_institution_ids":["https://openalex.org/I4210127875"],"apc_list":null,"apc_paid":null,"fwci":0.123,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.45888949,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"21","issue":null,"first_page":"121","last_page":"127"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11321","display_name":"Error Correcting Code Techniques","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7486666440963745},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.7395195960998535},{"id":"https://openalex.org/keywords/subroutine","display_name":"Subroutine","score":0.7221968770027161},{"id":"https://openalex.org/keywords/x86","display_name":"x86","score":0.7066417932510376},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6863967180252075},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4509708881378174},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.4486520290374756},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.41895341873168945},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3990868926048279},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.1974501609802246},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1701415777206421},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.16370287537574768},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.11751455068588257}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7486666440963745},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.7395195960998535},{"id":"https://openalex.org/C96147967","wikidata":"https://www.wikidata.org/wiki/Q190686","display_name":"Subroutine","level":2,"score":0.7221968770027161},{"id":"https://openalex.org/C170723468","wikidata":"https://www.wikidata.org/wiki/Q182933","display_name":"x86","level":3,"score":0.7066417932510376},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6863967180252075},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4509708881378174},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.4486520290374756},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.41895341873168945},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3990868926048279},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.1974501609802246},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1701415777206421},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.16370287537574768},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.11751455068588257},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata59044.2023.10386645","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/bigdata59044.2023.10386645","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320332359","display_name":"Office of Science","ror":"https://ror.org/00mmn6b08"},{"id":"https://openalex.org/F4320337506","display_name":"Advanced Scientific Computing Research","ror":"https://ror.org/0012c7r22"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W118481696","https://openalex.org/W1634005169","https://openalex.org/W1865067283","https://openalex.org/W1967368946","https://openalex.org/W1975827585","https://openalex.org/W1999668761","https://openalex.org/W2049631158","https://openalex.org/W2069903846","https://openalex.org/W2073061372","https://openalex.org/W2100562604","https://openalex.org/W2105733314","https://openalex.org/W2116762767","https://openalex.org/W2141362318","https://openalex.org/W2142827986","https://openalex.org/W2142838865","https://openalex.org/W2146762855","https://openalex.org/W2508318196","https://openalex.org/W2800394774","https://openalex.org/W2913668833","https://openalex.org/W3145543370","https://openalex.org/W4212848460","https://openalex.org/W4236644932","https://openalex.org/W4242408666","https://openalex.org/W4246396312","https://openalex.org/W6635035540","https://openalex.org/W6636657728","https://openalex.org/W6668990524","https://openalex.org/W6674878640","https://openalex.org/W6685529966","https://openalex.org/W6725376809"],"related_works":["https://openalex.org/W2391861012","https://openalex.org/W2488336788","https://openalex.org/W4285469493","https://openalex.org/W1593224248","https://openalex.org/W2375766869","https://openalex.org/W2080384954","https://openalex.org/W4382519933","https://openalex.org/W2387256666","https://openalex.org/W1925544630","https://openalex.org/W2004686618"],"abstract_inverted_index":{"The":[0,69],"K-means":[1,35,67,77,109,125,132,142,153],"algorithm":[2,40,154],"is":[3,111,128,156],"one":[4],"of":[5,95,106],"the":[6,39,45,62,66,81,97,104,107,139,149],"simplest":[7],"and":[8,29,88,101,155],"most":[9],"universal":[10],"clustering":[11],"algorithms.":[12],"Significant":[13],"work":[14],"has":[15,56],"been":[16,57],"carried":[17],"out":[18],"over":[19],"several":[20],"years":[21],"to":[22,59,130,158],"improve":[23],"its":[24,115],"performance":[25,105,164],"in":[26,65,93],"both":[27],"academic":[28],"industrial":[30],"applications.":[31],"Researchers":[32],"have":[33],"optimized":[34],"not":[36],"only":[37],"on":[38,44,80,135,165],"level":[41],"but":[42],"also":[43],"architecture":[46,90,102],"level.":[47],"Notably,":[48],"GEMM,":[49],"a":[50,75,122],"rigorously":[51],"studied":[52],"matrix":[53,141],"multiplication":[54],"operation,":[55],"used":[58],"speed":[60],"up":[61],"Euclidean-distance":[63],"calculations":[64],"algorithm.":[68],"Intel":[70,82,151,166],"DAAL":[71,152],"library":[72],"currently":[73],"provides":[74],"fast":[76],"implementation":[78,110],"based":[79],"Math":[83],"Kernel":[84],"Library":[85],"GEMM":[86,99],"subroutine":[87,100],"low-level":[89],"information.":[91],"However,":[92],"spite":[94],"utilizing":[96],"MKL":[98],"properties,":[103],"state-of-the-art":[108,150],"still":[112],"far":[113],"from":[114],"hardware":[116],"peak":[117],"performance.":[118],"This":[119],"paper":[120],"presents":[121],"faster":[123,147],"fused-matrix":[124],"kernel":[126,143],"that":[127],"superior":[129],"current":[131],"designs.":[133],"Based":[134],"our":[136],"experimental":[137],"results,":[138],"fused":[140],"runs":[144],"around":[145],"76%":[146],"than":[148],"able":[157],"achieve":[159],"nearly":[160],"double":[161],"floating":[162],"point":[163],"x86-84":[167],"Ivy":[168],"micro-architectures.":[169]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
