{"id":"https://openalex.org/W4408988071","doi":"https://doi.org/10.1007/s11227-025-07145-6","title":"tfQMRgpu: a GPU-accelerated linear solver with block-sparse complex result matrix","display_name":"tfQMRgpu: a GPU-accelerated linear solver with block-sparse complex result matrix","publication_year":2025,"publication_date":"2025-03-27","ids":{"openalex":"https://openalex.org/W4408988071","doi":"https://doi.org/10.1007/s11227-025-07145-6"},"language":"en","primary_location":{"id":"doi:10.1007/s11227-025-07145-6","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11227-025-07145-6","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11227-025-07145-6.pdf","source":{"id":"https://openalex.org/S32326811","display_name":"The Journal of Supercomputing","issn_l":"0920-8542","issn":["0920-8542","1573-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Journal of Supercomputing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s11227-025-07145-6.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047825572","display_name":"Paul F. Baumeister","orcid":"https://orcid.org/0000-0002-2005-4474"},"institutions":[{"id":"https://openalex.org/I171892758","display_name":"Forschungszentrum J\u00fclich","ror":"https://ror.org/02nv7yv05","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1305996414","https://openalex.org/I171892758"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Paul F. Baumeister","raw_affiliation_strings":["J\u00fclich Supercomputing Centre, Forschungszentrum J\u00fclich, 52425, J\u00fclich, Germany"],"affiliations":[{"raw_affiliation_string":"J\u00fclich Supercomputing Centre, Forschungszentrum J\u00fclich, 52425, J\u00fclich, Germany","institution_ids":["https://openalex.org/I171892758"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032158508","display_name":"Stepan Nassyr","orcid":"https://orcid.org/0000-0002-0035-244X"},"institutions":[{"id":"https://openalex.org/I171892758","display_name":"Forschungszentrum J\u00fclich","ror":"https://ror.org/02nv7yv05","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1305996414","https://openalex.org/I171892758"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Stepan Nassyr","raw_affiliation_strings":["J\u00fclich Supercomputing Centre, Forschungszentrum J\u00fclich, 52425, J\u00fclich, Germany"],"affiliations":[{"raw_affiliation_string":"J\u00fclich Supercomputing Centre, Forschungszentrum J\u00fclich, 52425, J\u00fclich, Germany","institution_ids":["https://openalex.org/I171892758"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5047825572"],"corresponding_institution_ids":["https://openalex.org/I171892758"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08198647,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"81","issue":"5","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10739","display_name":"Electromagnetic Scattering and Analysis","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8706740736961365},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.7363298535346985},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.6380947232246399},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.6354608535766602},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6120660305023193},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5430469512939453},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.5188092589378357},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.5133813619613647},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.48343002796173096},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3326374292373657},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.296613872051239},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0928628146648407},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.0791645348072052},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.06984397768974304},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.06209135055541992}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8706740736961365},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.7363298535346985},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.6380947232246399},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.6354608535766602},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6120660305023193},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5430469512939453},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.5188092589378357},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.5133813619613647},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.48343002796173096},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3326374292373657},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.296613872051239},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0928628146648407},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0791645348072052},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.06984397768974304},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.06209135055541992},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C147597530","wikidata":"https://www.wikidata.org/wiki/Q369472","display_name":"Computational chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s11227-025-07145-6","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11227-025-07145-6","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11227-025-07145-6.pdf","source":{"id":"https://openalex.org/S32326811","display_name":"The Journal of Supercomputing","issn_l":"0920-8542","issn":["0920-8542","1573-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Journal of Supercomputing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s11227-025-07145-6","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11227-025-07145-6","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11227-025-07145-6.pdf","source":{"id":"https://openalex.org/S32326811","display_name":"The Journal of Supercomputing","issn_l":"0920-8542","issn":["0920-8542","1573-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Journal of Supercomputing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1564784682","display_name":null,"funder_award_id":"SiVeGCS","funder_id":"https://openalex.org/F4320321114","funder_display_name":"Bundesministerium f\u00fcr Bildung und Forschung"}],"funders":[{"id":"https://openalex.org/F4320321114","display_name":"Bundesministerium f\u00fcr Bildung und Forschung","ror":"https://ror.org/04pz7b180"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4408988071.pdf"},"referenced_works_count":26,"referenced_works":["https://openalex.org/W1971821930","https://openalex.org/W1983598244","https://openalex.org/W2002033607","https://openalex.org/W2022445566","https://openalex.org/W2029116634","https://openalex.org/W2030976617","https://openalex.org/W2040298674","https://openalex.org/W2045555463","https://openalex.org/W2067153720","https://openalex.org/W2069538140","https://openalex.org/W2080353445","https://openalex.org/W2085259167","https://openalex.org/W2090838661","https://openalex.org/W2230728100","https://openalex.org/W2331333879","https://openalex.org/W2332437841","https://openalex.org/W2618077759","https://openalex.org/W2966477907","https://openalex.org/W3005610174","https://openalex.org/W3038558128","https://openalex.org/W3105598893","https://openalex.org/W3125167458","https://openalex.org/W4206097460","https://openalex.org/W4229666556","https://openalex.org/W4322576898","https://openalex.org/W6912883650"],"related_works":["https://openalex.org/W1963859303","https://openalex.org/W2364044215","https://openalex.org/W2389600408","https://openalex.org/W240129890","https://openalex.org/W3048701459","https://openalex.org/W2149078538","https://openalex.org/W2370314112","https://openalex.org/W1912958759","https://openalex.org/W2792081825","https://openalex.org/W2893308117"],"abstract_inverted_index":{"Abstract":[0],"We":[1],"present":[2],",":[3],"a":[4,73,89,149],"GPU-accelerated":[5],"iterative":[6],"linear":[7],"solver":[8,43],"based":[9],"on":[10,83,114],"the":[11,25,58,119,160],"transpose-free":[12],"quasi-minimal":[13],"residual":[14],"(tfQMR)":[15],"method.":[16],"Designed":[17],"for":[18,107,132,140,152],"large-scale":[19],"electronic":[20],"structure":[21],"calculations,":[22],"particularly":[23],"in":[24,122],"context":[26],"of":[27,60,128,162],"Korringa\u2013Kohn\u2013Rostoker":[28],"density":[29],"functional":[30],"theory,":[31],"efficiently":[32],"handles":[33],"block-sparse":[34,133],"complex":[35],"matrices":[36],"arising":[37],"from":[38,159],"multiple":[39,61],"scattering":[40],"theory.":[41],"The":[42],"exploits":[44],"GPU":[45],"parallelism":[46],"to":[47,72],"accelerate":[48],"convergence":[49],"while":[50],"leveraging":[51],"memory-efficient":[52],"sparse":[53],"storage":[54],"formats.":[55],"By":[56],"unifying":[57],"solution":[59],"right-hand":[62],"side":[63],"(RHS)":[64],"block":[65],"vectors,":[66],"significantly":[67],"improves":[68],"throughput,":[69],"demonstrating":[70],"up":[71],"$$3.5\\times$$":[74],"<mml:math":[75],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\">":[76],"<mml:mrow>":[77],"<mml:mn>3.5</mml:mn>":[78],"<mml:mo>\u00d7</mml:mo>":[79],"</mml:mrow>":[80],"</mml:math>":[81],"speedup":[82],"modern":[84],"GPUs.":[85],"Additionally,":[86],"we":[87],"introduce":[88],"flexible":[90],"implementation":[91],"framework":[92],"that":[93,156],"supports":[94],"both":[95],"explicit":[96],"matrix-based":[97],"and":[98,145],"matrix-free":[99],"operator":[100],"formulations,":[101],"such":[102],"as":[103],"high-order":[104],"finite-difference":[105],"stencils":[106],"real-space":[108],"grid-based":[109],"Green":[110],"function":[111],"calculations.":[112],"Benchmarks":[113],"various":[115],"NVIDIA":[116],"GPUs":[117],"demonstrate":[118],"solver\u2019s":[120],"efficiency,":[121],"some":[123],"cases":[124],"achieving":[125],"over":[126],"56%":[127],"peak":[129],"floating-point":[130],"performance":[131],"matrix":[134],"multiplications.":[135],"is":[136],"open-source,":[137],"providing":[138],"interfaces":[139],"C,":[141],"C++,":[142],"Fortran,":[143],"Julia,":[144],"Python,":[146],"making":[147],"it":[148],"versatile":[150],"tool":[151],"high-performance":[153],"computing":[154],"applications":[155],"can":[157],"benefit":[158],"unification":[161],"RHS":[163],"problems.":[164]},"counts_by_year":[],"updated_date":"2025-12-19T19:40:27.379048","created_date":"2025-10-10T00:00:00"}
