{"id":"https://openalex.org/W3048330007","doi":"https://doi.org/10.1145/3404397.3404400","title":"CapelliniSpTRSV: A Thread-Level Synchronization-Free Sparse Triangular Solve on GPUs","display_name":"CapelliniSpTRSV: A Thread-Level Synchronization-Free Sparse Triangular Solve on GPUs","publication_year":2020,"publication_date":"2020-08-09","ids":{"openalex":"https://openalex.org/W3048330007","doi":"https://doi.org/10.1145/3404397.3404400","mag":"3048330007"},"language":"en","primary_location":{"id":"doi:10.1145/3404397.3404400","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3404397.3404400","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"49th International Conference on Parallel Processing - ICPP","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025797638","display_name":"Jiya Su","orcid":"https://orcid.org/0000-0002-4952-1486"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiya Su","raw_affiliation_strings":["Renmin University of China, China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091139467","display_name":"Feng Zhang","orcid":"https://orcid.org/0000-0003-1983-7321"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feng Zhang","raw_affiliation_strings":["Renmin University of China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100444152","display_name":"Weifeng Liu","orcid":"https://orcid.org/0000-0002-2150-5759"},"institutions":[{"id":"https://openalex.org/I204553293","display_name":"China University of Petroleum, Beijing","ror":"https://ror.org/041qf4r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I204553293"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weifeng Liu","raw_affiliation_strings":["China University of Petroleum"],"affiliations":[{"raw_affiliation_string":"China University of Petroleum","institution_ids":["https://openalex.org/I204553293"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039946576","display_name":"Bingsheng He","orcid":"https://orcid.org/0000-0001-8618-4581"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Bingsheng He","raw_affiliation_strings":["National University of Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076272801","display_name":"Ruofan Wu","orcid":"https://orcid.org/0000-0001-6826-8108"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruofan Wu","raw_affiliation_strings":["Renmin University of China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008721449","display_name":"Xiaoyong Du","orcid":"https://orcid.org/0000-0002-5757-9135"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoyong Du","raw_affiliation_strings":["Renmin University of China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073855975","display_name":"Rujia Wang","orcid":"https://orcid.org/0000-0003-4019-5327"},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rujia Wang","raw_affiliation_strings":["Illinois Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Illinois Institute of Technology","institution_ids":["https://openalex.org/I180949307"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5025797638"],"corresponding_institution_ids":["https://openalex.org/I78988378"],"apc_list":null,"apc_paid":null,"fwci":1.8483,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.85380372,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.8144983053207397},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7794874310493469},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7440736293792725},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.7217280864715576},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.6816406846046448},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.5442301630973816},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.5075768232345581},{"id":"https://openalex.org/keywords/flops","display_name":"FLOPS","score":0.4388483762741089},{"id":"https://openalex.org/keywords/sparse-array","display_name":"Sparse array","score":0.42906302213668823},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4122544527053833},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.09571114182472229}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.8144983053207397},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7794874310493469},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7440736293792725},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.7217280864715576},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.6816406846046448},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.5442301630973816},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.5075768232345581},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.4388483762741089},{"id":"https://openalex.org/C145177509","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse array","level":2,"score":0.42906302213668823},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4122544527053833},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.09571114182472229},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3404397.3404400","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3404397.3404400","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"49th International Conference on Parallel Processing - ICPP","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W993511226","https://openalex.org/W1506342804","https://openalex.org/W1509684299","https://openalex.org/W1527104720","https://openalex.org/W1965034778","https://openalex.org/W1982358758","https://openalex.org/W2002160161","https://openalex.org/W2003840611","https://openalex.org/W2009654791","https://openalex.org/W2010315317","https://openalex.org/W2026517532","https://openalex.org/W2035080386","https://openalex.org/W2050710108","https://openalex.org/W2052602889","https://openalex.org/W2072680607","https://openalex.org/W2080090223","https://openalex.org/W2080815425","https://openalex.org/W2088866486","https://openalex.org/W2091883426","https://openalex.org/W2097717378","https://openalex.org/W2098903349","https://openalex.org/W2115052535","https://openalex.org/W2122817518","https://openalex.org/W2130617051","https://openalex.org/W2150011671","https://openalex.org/W2154111453","https://openalex.org/W2155551886","https://openalex.org/W2155751237","https://openalex.org/W2168931017","https://openalex.org/W2181846018","https://openalex.org/W2263374743","https://openalex.org/W2276719940","https://openalex.org/W2406344247","https://openalex.org/W2411480360","https://openalex.org/W2469975815","https://openalex.org/W2524532592","https://openalex.org/W2626696598","https://openalex.org/W2789228469","https://openalex.org/W2805150752","https://openalex.org/W2887223333","https://openalex.org/W2934889147","https://openalex.org/W2949783914","https://openalex.org/W2967987264","https://openalex.org/W3048232878","https://openalex.org/W3098059246","https://openalex.org/W3141344572","https://openalex.org/W4206109200","https://openalex.org/W4232836277","https://openalex.org/W4302564868"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W2891818448","https://openalex.org/W2117946168","https://openalex.org/W2051410394","https://openalex.org/W2007259720","https://openalex.org/W4250086616","https://openalex.org/W2479967241","https://openalex.org/W2028515057","https://openalex.org/W2137430301"],"abstract_inverted_index":{"Sparse":[0,211],"triangular":[1],"solves":[2],"(SpTRSVs)":[3],"have":[4,17,111],"been":[5,18],"extensively":[6],"used":[7],"in":[8,104,243,248],"linear":[9],"algebra":[10],"fields,":[11],"and":[12,28,72,107,192,218,237],"many":[13],"GPU-based":[14],"SpTRSV":[15,36,46,127,223,235,242],"algorithms":[16,47],"proposed.":[19],"Synchronization-free":[20],"SpTRSVs,":[21],"due":[22],"to":[23,100,145,198],"their":[24],"short":[25],"preprocessing":[26,144],"time":[27],"high":[29,71,151],"performance,":[30],"are":[31],"currently":[32],"the":[33,42,63,73,98,115,137,181,209,232,241],"most":[34,182],"popular":[35,183],"algorithms.":[37],"However,":[38],"we":[39,121],"observe":[40],"that":[41,61,155,221],"performance":[43,152,179],"of":[44,66,76,114],"those":[45,83],"on":[48,97,153,167,180,214],"different":[49],"matrices":[50,154,207],"can":[51,175],"vary":[52],"greatly":[53],"by":[54],"845":[55],"times.":[56],"Our":[57],"further":[58],"studies":[59],"show":[60,220],"when":[62],"average":[64,74],"number":[65,75],"components":[67],"per":[68,79],"level":[69],"is":[70,81,91,228,246],"nonzero":[77],"elements":[78],"row":[80,103,189],"low,":[82],"SpTRSVs":[84,157],"exhibit":[85],"extremely":[86],"low":[87],"performance.":[88],"The":[89],"reason":[90],"that,":[92],"they":[93],"use":[94],"a":[95,102,124],"warp":[96],"GPU":[99,216],"process":[101],"sparse":[105,169,184,188],"matrices,":[106],"such":[108],"warp-level":[109],"designs":[110],"severe":[112],"underutilization":[113],"GPU.":[116],"To":[117],"solve":[118],"this":[119],"problem,":[120],"propose":[122],"CapelliniSpTRSV,":[123],"thread-level":[125],"synchronization-free":[126,234],"algorithm.":[128],"Particularly,":[129],"CapelliniSpTRSV":[130,140,149,204,245],"has":[131],"three":[132,215],"novel":[133],"features.":[134],"First,":[135],"unlike":[136],"previous":[138,156],"studies,":[139],"does":[141,164],"not":[142,165,196],"need":[143,197],"calculate":[146],"levels.":[147],"Second,":[148],"exhibits":[150,224],"cannot":[158],"handle":[159],"efficiently.":[160],"Third,":[161],"CapelliniSpTRSV\u2019s":[162],"optimization":[163],"rely":[166],"specific":[168],"matrix":[170,185],"storage":[171],"format.":[172],"Instead,":[173],"it":[174],"achieve":[176],"very":[177],"good":[178],"storage,":[186],"compressed":[187],"(CSR)":[190],"format,":[191],"thus":[193],"users":[194],"do":[195],"conduct":[199],"format":[200],"conversion.":[201],"We":[202],"evaluate":[203],"with":[205],"245":[206],"from":[208],"Florida":[210],"Matrix":[212],"Collection":[213],"platforms,":[217],"experiments":[219],"our":[222],"6.84":[225],"GFLOPS/s,":[226],"which":[227],"4.97x":[229],"speedup":[230,239],"over":[231,240],"state-of-the-art":[233],"algorithm,":[236],"4.74x":[238],"cuSPARSE.":[244],"open-sourced":[247],"https://github.com/JiyaSu/CapelliniSpTRSV.":[249]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3}],"updated_date":"2026-02-25T08:12:03.925757","created_date":"2025-10-10T00:00:00"}
