{"id":"https://openalex.org/W4413411966","doi":"https://doi.org/10.1145/3721145.3725746","title":"CB-SpMV:A Data Aggregating and Balance Algorithm for for Cache-Friendly Block-Based SpMV on GPUs","display_name":"CB-SpMV:A Data Aggregating and Balance Algorithm for for Cache-Friendly Block-Based SpMV on GPUs","publication_year":2025,"publication_date":"2025-06-08","ids":{"openalex":"https://openalex.org/W4413411966","doi":"https://doi.org/10.1145/3721145.3725746"},"language":"en","primary_location":{"id":"doi:10.1145/3721145.3725746","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3721145.3725746","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 39th ACM International Conference on Supercomputing","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2605.18515","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5119381457","display_name":"Xing Cong","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xing Cong","raw_affiliation_strings":["Beihang University, BeiJing, China"],"raw_orcid":"https://orcid.org/0009-0007-0405-5008","affiliations":[{"raw_affiliation_string":"Beihang University, BeiJing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":null,"display_name":"FuKai Sun","orcid":"https://orcid.org/0009-0008-4067-1219"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"FuKai Sun","raw_affiliation_strings":["Beihang University, BeiJing, China"],"raw_orcid":"https://orcid.org/0009-0008-4067-1219","affiliations":[{"raw_affiliation_string":"Beihang University, BeiJing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080920527","display_name":"Y.X. Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"YiFan Chen","raw_affiliation_strings":["Beihang University, BeiJing, China"],"raw_orcid":"https://orcid.org/0009-0006-3065-9035","affiliations":[{"raw_affiliation_string":"Beihang University, BeiJing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024508481","display_name":"Chenhao Xie","orcid":"https://orcid.org/0000-0002-1399-0352"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenhao Xie","raw_affiliation_strings":["Beihang University, BeiJing, China"],"raw_orcid":"https://orcid.org/0000-0002-1399-0352","affiliations":[{"raw_affiliation_string":"Beihang University, BeiJing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100330624","display_name":"Yi Liu","orcid":"https://orcid.org/0000-0003-1829-2817"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Liu","raw_affiliation_strings":["Beihang University, BeiJing, China"],"raw_orcid":"https://orcid.org/0000-0003-1829-2817","affiliations":[{"raw_affiliation_string":"Beihang University, BeiJing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079362609","display_name":"Depei Qian","orcid":"https://orcid.org/0000-0002-5382-1473"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Depei Qian","raw_affiliation_strings":["Beihang University, BeiJing, China"],"raw_orcid":"https://orcid.org/0000-0002-5382-1473","affiliations":[{"raw_affiliation_string":"Beihang University, BeiJing, China","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.21547347,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"149","last_page":"160"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8236684203147888},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5965146422386169},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5529678463935852},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5345646142959595},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5221205949783325},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08429980278015137}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8236684203147888},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5965146422386169},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5529678463935852},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5345646142959595},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5221205949783325},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08429980278015137},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3721145.3725746","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3721145.3725746","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 39th ACM International Conference on Supercomputing","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2605.18515","is_oa":true,"landing_page_url":"https://arxiv.org/abs/2605.18515","pdf_url":"https://arxiv.org/pdf/2605.18515","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2605.18515","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18515","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2605.18515","is_oa":true,"landing_page_url":"https://arxiv.org/abs/2605.18515","pdf_url":"https://arxiv.org/pdf/2605.18515","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3691559314","display_name":null,"funder_award_id":"2023YFB3002902","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W1884140786","https://openalex.org/W1993704253","https://openalex.org/W2009654791","https://openalex.org/W2013226093","https://openalex.org/W2035080386","https://openalex.org/W2065607954","https://openalex.org/W2068863300","https://openalex.org/W2080353445","https://openalex.org/W2087507944","https://openalex.org/W2088866486","https://openalex.org/W2101511474","https://openalex.org/W2105672886","https://openalex.org/W2111667319","https://openalex.org/W2126004407","https://openalex.org/W2128853364","https://openalex.org/W2134237243","https://openalex.org/W2242228415","https://openalex.org/W2476506026","https://openalex.org/W2570841452","https://openalex.org/W2594491068","https://openalex.org/W2617403939","https://openalex.org/W2766501636","https://openalex.org/W2902783593","https://openalex.org/W3022703302","https://openalex.org/W3048232878","https://openalex.org/W3096209535","https://openalex.org/W3106161546","https://openalex.org/W3175189837","https://openalex.org/W3202517391","https://openalex.org/W4220912491","https://openalex.org/W4226207577","https://openalex.org/W4238995617","https://openalex.org/W4247712932","https://openalex.org/W4247828381","https://openalex.org/W4316116762","https://openalex.org/W4316252382","https://openalex.org/W4318969866","https://openalex.org/W4321636675","https://openalex.org/W4383749629","https://openalex.org/W4384705388","https://openalex.org/W4385623033","https://openalex.org/W4388661983","https://openalex.org/W4388855631","https://openalex.org/W4393186017","https://openalex.org/W4395106472","https://openalex.org/W4401408866","https://openalex.org/W4402196491"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4399354997","https://openalex.org/W3062287","https://openalex.org/W2005148983","https://openalex.org/W2012954338","https://openalex.org/W2096672917","https://openalex.org/W2392023973","https://openalex.org/W2029210135"],"abstract_inverted_index":{"Sparse":[0],"matrix-vector":[1],"multiplication":[2],"(SpMV)":[3],"is":[4,64,114,166],"crucial":[5],"in":[6,62],"computational":[7],"science,":[8],"engineering,":[9],"and":[10,32,55,92,103,141,155,160],"machine":[11],"learning.":[12],"Despite":[13],"substantial":[14],"efforts":[15],"to":[16,27,100,105,116,147],"improve":[17],"SpMV":[18,47],"performance":[19],"on":[20,126,157],"GPUs":[21],"through":[22],"various":[23],"techniques,":[24],"issues":[25],"related":[26],"data":[28,53,77,81],"locality,":[29],"hardware":[30,85],"utilization,":[31,86],"load":[33],"balancing":[34],"persist,":[35],"leaving":[36],"room":[37],"for":[38,78],"further":[39],"optimization.":[40],"This":[41],"paper":[42],"presents":[43],"CB-SpMV,":[44],"a":[45,51,87],"cache-friendly":[46],"optimization":[48],"algorithm,":[49],"using":[50],"novel":[52],"convergent":[54],"adaptable":[56],"2D":[57],"blocking":[58],"structure.":[59],"The":[60,164],"matrix":[61],"CB-SpMV":[63,135],"divided":[65],"into":[66],"independent":[67],"sub-blocks,":[68],"with":[69],"virtual":[70],"pointers":[71],"aggregating":[72],"different":[73],"types":[74],"of":[75,95,145],"intra-block":[76],"better":[79],"cache-level":[80],"locality.":[82],"To":[83],"enhance":[84],"block-aware":[88],"column":[89],"aggregation":[90],"strategy":[91],"the":[93,130],"selection":[94],"sub-block":[96],"formats":[97],"are":[98],"proposed":[99],"accelerate":[101],"computation":[102],"adapt":[104],"varying":[106],"sparse":[107],"matrices.":[108],"Finally,":[109],"an":[110],"inter-block":[111],"load-balancing":[112],"algorithm":[113],"designed":[115],"ensure":[117],"efficient":[118],"workload":[119],"distribution":[120],"across":[121],"thread":[122],"blocks.":[123],"Experimental":[124],"evaluations":[125],"2,843":[127],"matrices":[128],"from":[129],"SuiteSparse":[131],"Collection":[132],"show":[133],"that":[134],"significantly":[136],"improves":[137],"cache":[138],"hit":[139],"rates":[140],"achieves":[142],"average":[143],"speedups":[144],"up":[146],"3.95x":[148],"over":[149],"state-of-the-art":[150],"methods":[151],"like":[152],"cuSPARSE-BSR,":[153],"TileSpMV,":[154],"DASP":[156],"NVIDIA":[158],"A100":[159],"RTX":[161],"4090":[162],"GPUs.":[163],"implementation":[165],"available":[167],"at:":[168],"\\url{https://github.com/xing-cong/CB-Sparse}.":[169]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
