{"id":"https://openalex.org/W4407115054","doi":"https://doi.org/10.1145/3716171","title":"High Performance Solution of Tridiagonal Systems on the GPU","display_name":"High Performance Solution of Tridiagonal Systems on the GPU","publication_year":2025,"publication_date":"2025-02-04","ids":{"openalex":"https://openalex.org/W4407115054","doi":"https://doi.org/10.1145/3716171"},"language":"en","primary_location":{"id":"doi:10.1145/3716171","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3716171","pdf_url":null,"source":{"id":"https://openalex.org/S2483380313","display_name":"ACM Transactions on Parallel Computing","issn_l":"2329-4949","issn":["2329-4949","2329-4957"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Parallel Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3716171","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077834411","display_name":"Dmitrii Tolmachev","orcid":"https://orcid.org/0000-0002-5494-7983"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Dmitrii Tolmachev","raw_affiliation_strings":["Institute of Geophysics, ETH Zurich, Zurich, Switzerland"],"raw_orcid":"https://orcid.org/0000-0002-5494-7983","affiliations":[{"raw_affiliation_string":"Institute of Geophysics, ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069939126","display_name":"Philippe Marti","orcid":"https://orcid.org/0000-0002-3936-1503"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Philippe Marti","raw_affiliation_strings":["Institute of Geophysics, ETH Zurich, Zurich, Switzerland","Institute of Geophysics, ETH Zurich, Zurich Switzerland"],"raw_orcid":"https://orcid.org/0000-0002-3936-1503","affiliations":[{"raw_affiliation_string":"Institute of Geophysics, ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]},{"raw_affiliation_string":"Institute of Geophysics, ETH Zurich, Zurich Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002443404","display_name":"Giacomo Castiglioni","orcid":"https://orcid.org/0000-0003-0278-6951"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Giacomo Castiglioni","raw_affiliation_strings":["Institute of Geophysics, ETH Zurich, Zurich, Switzerland","Institute of Geophysics, ETH Zurich, Zurich Switzerland"],"raw_orcid":"https://orcid.org/0000-0003-0278-6951","affiliations":[{"raw_affiliation_string":"Institute of Geophysics, ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]},{"raw_affiliation_string":"Institute of Geophysics, ETH Zurich, Zurich Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041297653","display_name":"Andrew Jackson","orcid":"https://orcid.org/0000-0003-1821-4114"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Andrew Jackson","raw_affiliation_strings":["Institute of Geophysics, ETH Zurich, Zurich, Switzerland","Institute of Geophysics, ETH Zurich, Zurich Switzerland"],"raw_orcid":"https://orcid.org/0000-0003-1821-4114","affiliations":[{"raw_affiliation_string":"Institute of Geophysics, ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]},{"raw_affiliation_string":"Institute of Geophysics, ETH Zurich, Zurich Switzerland","institution_ids":["https://openalex.org/I35440088"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5077834411"],"corresponding_institution_ids":["https://openalex.org/I35440088"],"apc_list":null,"apc_paid":null,"fwci":1.1789,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.77689994,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"12","issue":"2","first_page":"1","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10739","display_name":"Electromagnetic Scattering and Analysis","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13126","display_name":"Scientific Research and Discoveries","score":0.9731000065803528,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/tridiagonal-matrix","display_name":"Tridiagonal matrix","score":0.8885279893875122},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6080271005630493},{"id":"https://openalex.org/keywords/tridiagonal-matrix-algorithm","display_name":"Tridiagonal matrix algorithm","score":0.5935593247413635},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.575803279876709},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.5046166181564331},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.11537474393844604}],"concepts":[{"id":"https://openalex.org/C51647924","wikidata":"https://www.wikidata.org/wiki/Q1755277","display_name":"Tridiagonal matrix","level":3,"score":0.8885279893875122},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6080271005630493},{"id":"https://openalex.org/C176603272","wikidata":"https://www.wikidata.org/wiki/Q1819156","display_name":"Tridiagonal matrix algorithm","level":4,"score":0.5935593247413635},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.575803279876709},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.5046166181564331},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.11537474393844604},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3716171","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3716171","pdf_url":null,"source":{"id":"https://openalex.org/S2483380313","display_name":"ACM Transactions on Parallel Computing","issn_l":"2329-4949","issn":["2329-4949","2329-4957"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Parallel Computing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3716171","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3716171","pdf_url":null,"source":{"id":"https://openalex.org/S2483380313","display_name":"ACM Transactions on Parallel Computing","issn_l":"2329-4949","issn":["2329-4949","2329-4957"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Parallel Computing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1948296184","https://openalex.org/W2043990347","https://openalex.org/W2045992017","https://openalex.org/W2047610878","https://openalex.org/W2104380208","https://openalex.org/W2160535745","https://openalex.org/W2461005004","https://openalex.org/W2731702594","https://openalex.org/W3015517561","https://openalex.org/W3107663002","https://openalex.org/W3109089832","https://openalex.org/W3159846083","https://openalex.org/W3196521375","https://openalex.org/W3215381526","https://openalex.org/W4237753580","https://openalex.org/W4318617421","https://openalex.org/W4319069056","https://openalex.org/W4362675325"],"related_works":["https://openalex.org/W2157863322","https://openalex.org/W2360550119","https://openalex.org/W2001381587","https://openalex.org/W2057923237","https://openalex.org/W4212973497","https://openalex.org/W1980786482","https://openalex.org/W1606194289","https://openalex.org/W4289860553","https://openalex.org/W3477132","https://openalex.org/W1792937979"],"abstract_inverted_index":{"In":[0],"this":[1],"article,":[2],"we":[3],"present":[4],"PfSolve\u2014a":[5],"new,":[6],"performant,":[7],"cross-platform,":[8],"and":[9,14,59,96,135,153],"open-source":[10],"implementation":[11,82],"of":[12,31,39,62,68,83,114,129],"tridiagonal":[13,120],"bidiagonal":[15],"matrix":[16],"solvers":[17],"for":[18,89,119,159],"the":[19,37,51,69,80,84,105,115,127,130],"GPU":[20,42,90],"architecture.":[21],"Released":[22],"as":[23,73],"a":[24,40,44,140],"stand-alone":[25],"library,":[26],"PfSolve":[27,76,138,160],"can":[28],"solve":[29],"systems":[30],"arbitrary":[32],"size":[33,72],"that":[34,137],"fit":[35],"into":[36],"memory":[38],"single":[41],"with":[43,122],"potential":[45],"extension":[46],"to":[47,132],"multi-GPU":[48],"support":[49],"in":[50,56,102,104],"future.":[52],"The":[53,156],"code":[54,158],"works":[55],"single,":[57],"double,":[58],"double-double":[60],"emulation":[61],"quad":[63],"precision":[64],"using":[65,93],"only":[66],"0.1%":[67],"original":[70],"system":[71],"additional":[74],"memory.":[75],"is":[77,161],"based":[78],"on":[79,146,163],"in-house":[81],"Parallel":[85,116],"Thomas":[86,117],"algorithm":[87,118],"optimized":[88],"execution":[91],"by":[92],"warp-level":[94],"instructions":[95],"occupancy":[97],"optimizations,":[98],"which":[99],"are":[100],"discussed":[101],"detail":[103],"article.":[106],"This":[107],"work":[108],"also":[109],"presents":[110],"an":[111],"accuracy":[112],"analysis":[113],"matrices":[121],"various":[123],"dominance":[124],"factors":[125],"(approximately,":[126],"ratio":[128],"off-diagonal":[131],"diagonal":[133],"terms)":[134],"demonstrates":[136],"achieves":[139],"considerable":[141],"speedup":[142],"over":[143],"vendor":[144],"solutions":[145],"modern":[147],"HPC":[148],"GPUs":[149],"like":[150],"Nvidia":[151],"H100":[152],"AMD":[154],"MI210.":[155],"source":[157],"available":[162],"GitHub.":[164]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
