{"id":"https://openalex.org/W1998894100","doi":"https://doi.org/10.1109/tvlsi.2015.2421287","title":"GPU-Accelerated Parallel Sparse LU Factorization Method for Fast Circuit Analysis","display_name":"GPU-Accelerated Parallel Sparse LU Factorization Method for Fast Circuit Analysis","publication_year":2015,"publication_date":"2015-04-27","ids":{"openalex":"https://openalex.org/W1998894100","doi":"https://doi.org/10.1109/tvlsi.2015.2421287","mag":"1998894100"},"language":"en","primary_location":{"id":"doi:10.1109/tvlsi.2015.2421287","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvlsi.2015.2421287","pdf_url":null,"source":{"id":"https://openalex.org/S37538908","display_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","issn_l":"1063-8210","issn":["1063-8210","1557-9999"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101407801","display_name":"Kai He","orcid":"https://orcid.org/0000-0003-2639-1532"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kai He","raw_affiliation_strings":["Department of Electrical Engineering, University of California at Riverside, Riverside, CA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, University of California at Riverside, Riverside, CA, USA","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058844682","display_name":"Sheldon X.-D. Tan","orcid":"https://orcid.org/0000-0003-2119-6869"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sheldon X. -D. Tan","raw_affiliation_strings":["Department of Electrical Engineering, University of California at Riverside, Riverside, CA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, University of California at Riverside, Riverside, CA, USA","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100452499","display_name":"Hai Wang","orcid":"https://orcid.org/0000-0002-4003-2758"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hai Wang","raw_affiliation_strings":["School of Microelectronics and Solid-State Electronics, University of Electronic Science and Technology of China, Chengdu, China","School of Microelectronics & Solid-State Electronics, University of Electronic Science & Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Microelectronics and Solid-State Electronics, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]},{"raw_affiliation_string":"School of Microelectronics & Solid-State Electronics, University of Electronic Science & Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072810436","display_name":"Guoyong Shi","orcid":"https://orcid.org/0000-0002-8655-3487"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoyong Shi","raw_affiliation_strings":["School of Microelectronics, Shanghai Jiao Tong University, Shanghai, China","School of Microelectronics, Shanghai Jiao Tong University Shanghai China"],"affiliations":[{"raw_affiliation_string":"School of Microelectronics, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"School of Microelectronics, Shanghai Jiao Tong University Shanghai China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101407801"],"corresponding_institution_ids":["https://openalex.org/I103635307"],"apc_list":null,"apc_paid":null,"fwci":5.9588,"has_fulltext":false,"cited_by_count":71,"citation_normalized_percentile":{"value":0.9670489,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"24","issue":"3","first_page":"1140","last_page":"1150"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.8895793557167053},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8417307734489441},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8115057349205017},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.8106562495231628},{"id":"https://openalex.org/keywords/lu-decomposition","display_name":"LU decomposition","score":0.6753513216972351},{"id":"https://openalex.org/keywords/factorization","display_name":"Factorization","score":0.645508885383606},{"id":"https://openalex.org/keywords/incomplete-lu-factorization","display_name":"Incomplete LU factorization","score":0.5280573964118958},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.45990511775016785},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.4454162120819092},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.43013450503349304},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.36637628078460693},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.31182950735092163},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07380381226539612}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.8895793557167053},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8417307734489441},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8115057349205017},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.8106562495231628},{"id":"https://openalex.org/C123213974","wikidata":"https://www.wikidata.org/wiki/Q833089","display_name":"LU decomposition","level":4,"score":0.6753513216972351},{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.645508885383606},{"id":"https://openalex.org/C134978465","wikidata":"https://www.wikidata.org/wiki/Q1654069","display_name":"Incomplete LU factorization","level":4,"score":0.5280573964118958},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.45990511775016785},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.4454162120819092},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.43013450503349304},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.36637628078460693},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.31182950735092163},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07380381226539612},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tvlsi.2015.2421287","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvlsi.2015.2421287","pdf_url":null,"source":{"id":"https://openalex.org/S37538908","display_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","issn_l":"1063-8210","issn":["1063-8210","1557-9999"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1520511539","https://openalex.org/W1762731526","https://openalex.org/W2017431061","https://openalex.org/W2020561008","https://openalex.org/W2022038016","https://openalex.org/W2035080386","https://openalex.org/W2035720033","https://openalex.org/W2041999884","https://openalex.org/W2055094346","https://openalex.org/W2063748721","https://openalex.org/W2067795769","https://openalex.org/W2074093855","https://openalex.org/W2106969188","https://openalex.org/W2118076222","https://openalex.org/W2123097484","https://openalex.org/W2130886743","https://openalex.org/W2132450860","https://openalex.org/W2136834900","https://openalex.org/W2162322364","https://openalex.org/W2168033396","https://openalex.org/W2169150754"],"related_works":["https://openalex.org/W4318969829","https://openalex.org/W2904573237","https://openalex.org/W4289119257","https://openalex.org/W1997196200","https://openalex.org/W3014000538","https://openalex.org/W4391266752","https://openalex.org/W1973739845","https://openalex.org/W4246966070","https://openalex.org/W156354643","https://openalex.org/W210173153"],"abstract_inverted_index":{"Lower":[0],"upper":[1],"(LU)":[2],"factorization":[3,20,77,90,163,174],"for":[4,13,60,92,115],"sparse":[5,55,93,241,321],"matrices":[6,234,257],"is":[7,72,83,112],"the":[8,22,104,108,122,125,129,146,154,161,169,187,198,201,209,214,229,236,248,259,268,271,279,284,291,312,315,319],"most":[9],"important":[10],"computing":[11,46],"step":[12],"circuit":[14,61,116,233],"simulation":[15,62],"problems.":[16],"However,":[17],"parallelizing":[18],"LU":[19,56,76,89,134,151,162,173,226,295,322],"on":[21,58,85,118,164,228,252,300],"graphic":[23],"processing":[24],"units":[25],"(GPUs)":[26],"turns":[27],"out":[28],"to":[29,35],"be":[30,101],"a":[31,53,86,221,253,306],"difficult":[32],"problem":[33],"due":[34],"intrinsic":[36],"data":[37],"dependence":[38],"and":[39,63,140,194,200,217,264,270,287],"irregular":[40],"memory":[41],"access,":[42],"which":[43,71,111,309],"diminish":[44],"GPU":[45,74,81,119,150],"power.":[47],"In":[48,297],"this":[49],"paper,":[50],"we":[51,245],"propose":[52],"new":[54,69,148],"solver":[57,79,152,190,227,251],"GPUs":[59],"more":[64,98,113],"general":[65,256],"scientific":[66],"computing.":[67],"The":[68],"method,":[70,110,135],"called":[73],"accelerated":[75],"(GLU)":[78],"(for":[80],"LU),":[82],"based":[84],"hybrid":[87],"right-looking":[88,105],"algorithm":[91],"matrices.":[94],"We":[95,143],"show":[96,144,185],"that":[97,145,186],"concurrency":[99],"can":[100,176,191],"exploited":[102],"in":[103,160,167],"method":[106,317],"than":[107],"left-looking":[109,133,172,225,294],"popular":[114],"analysis,":[117],"platforms.":[120],"At":[121],"same":[123,292],"time,":[124],"GLU":[126,189,250,261],"also":[127,246],"preserves":[128],"benefit":[130],"of":[131,156,180,231,238,255,314],"column-based":[132],"such":[136],"as":[137],"symbolic":[138],"analysis":[139],"columnlevel":[141],"concurrency.":[142],"resulting":[147],"parallel":[149],"allows":[153],"parallelization":[155,179],"all":[157],"three":[158],"loops":[159],"GPUs.":[165],"While":[166],"contrast,":[168],"existing":[170,320],"GPU-based":[171,224,293],"approach":[175],"only":[177],"allow":[178],"two":[181],"loops.":[182],"Experimental":[183],"results":[184],"proposed":[188,223,249,316],"deliver":[192],"5.71\u03c7":[193],"1.46x":[195],"speedup":[196,207,219,266,277,289],"over":[197,208,213,220,267,278,283,290,318],"single-threaded":[199],"16-threaded":[202,272],"PARDISO":[203,273],"solvers,":[204,274],"respectively,":[205,275],"19.56x":[206],"KLU":[210,280],"solver,":[211,216,281,286],"47.13x":[212],"UMFPACK":[215,285],"1.47x":[218],"recently":[222],"set":[230,254],"typical":[232],"from":[235,258],"University":[237],"Florida":[239],"(UFL)":[240],"matrix":[242],"collection.":[243],"Furthermore,":[244],"compare":[247],"UFL,":[260],"achieves":[262],"6.38x":[263],"1.12x":[265],"singlethreaded":[269],"39.39x":[276],"24.04x":[282],"2.35x":[288],"solver.":[296],"addition,":[298],"comparison":[299],"self-generated":[301],"RLC":[302],"mesh":[303],"networks":[304],"shows":[305],"similar":[307],"trend,":[308],"further":[310],"validates":[311],"advantage":[313],"solvers.":[323]},"counts_by_year":[{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":8},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
