{"id":"https://openalex.org/W4416203912","doi":"https://doi.org/10.1145/3712285.3759807","title":"A Nested Krylov Method Using Half-Precision Arithmetic","display_name":"A Nested Krylov Method Using Half-Precision Arithmetic","publication_year":2025,"publication_date":"2025-11-12","ids":{"openalex":"https://openalex.org/W4416203912","doi":"https://doi.org/10.1145/3712285.3759807"},"language":null,"primary_location":{"id":"doi:10.1145/3712285.3759807","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3712285.3759807","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3712285.3759807","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101972478","display_name":"Kengo Suzuki","orcid":"https://orcid.org/0009-0002-8328-0388"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Kengo Suzuki","raw_affiliation_strings":["Kyoto University, Kyoto, Japan"],"affiliations":[{"raw_affiliation_string":"Kyoto University, Kyoto, Japan","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5090608549","display_name":"Takeshi Iwashita","orcid":"https://orcid.org/0000-0003-1938-1723"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takeshi Iwashita","raw_affiliation_strings":["Kyoto University, Kyoto, Japan"],"affiliations":[{"raw_affiliation_string":"Kyoto University, Kyoto, Japan","institution_ids":["https://openalex.org/I22299242"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101972478"],"corresponding_institution_ids":["https://openalex.org/I22299242"],"apc_list":null,"apc_paid":null,"fwci":1.42,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.87639818,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"711","last_page":"727"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.6468999981880188,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.6468999981880188,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.2354000061750412,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11435","display_name":"Polynomial and algebraic computation","score":0.03200000151991844,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.7117999792098999},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5713000297546387},{"id":"https://openalex.org/keywords/iterative-method","display_name":"Iterative method","score":0.43689998984336853},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.4339999854564667},{"id":"https://openalex.org/keywords/linear-system","display_name":"Linear system","score":0.42669999599456787},{"id":"https://openalex.org/keywords/iterative-refinement","display_name":"Iterative refinement","score":0.3937999904155731},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.3774999976158142}],"concepts":[{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.7117999792098999},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6934000253677368},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5713000297546387},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5307000279426575},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.43689998984336853},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.4339999854564667},{"id":"https://openalex.org/C6802819","wikidata":"https://www.wikidata.org/wiki/Q1072174","display_name":"Linear system","level":2,"score":0.42669999599456787},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.42579999566078186},{"id":"https://openalex.org/C2779982483","wikidata":"https://www.wikidata.org/wiki/Q6094420","display_name":"Iterative refinement","level":2,"score":0.3937999904155731},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.3774999976158142},{"id":"https://openalex.org/C83581934","wikidata":"https://www.wikidata.org/wiki/Q527381","display_name":"Arbitrary-precision arithmetic","level":2,"score":0.3711000084877014},{"id":"https://openalex.org/C147060835","wikidata":"https://www.wikidata.org/wiki/Q1757151","display_name":"Krylov subspace","level":3,"score":0.36640000343322754},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.3513000011444092},{"id":"https://openalex.org/C1306188","wikidata":"https://www.wikidata.org/wiki/Q4060687","display_name":"Nested loop join","level":2,"score":0.35109999775886536},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.32179999351501465},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.28929999470710754},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.287200003862381},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.2809000015258789},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.27079999446868896},{"id":"https://openalex.org/C35912277","wikidata":"https://www.wikidata.org/wiki/Q1243369","display_name":"Double-precision floating-point format","level":3,"score":0.25429999828338623}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3712285.3759807","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3712285.3759807","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3712285.3759807","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3712285.3759807","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6800296275","display_name":null,"funder_award_id":"JP23H00462, JP24KJ0266, JP25K24388","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1588915715","https://openalex.org/W1977146902","https://openalex.org/W1980266375","https://openalex.org/W1984840642","https://openalex.org/W2032576830","https://openalex.org/W2035080386","https://openalex.org/W2041876368","https://openalex.org/W2053564565","https://openalex.org/W2089099677","https://openalex.org/W2100060323","https://openalex.org/W2100530229","https://openalex.org/W2111593426","https://openalex.org/W2335170200","https://openalex.org/W2599973081","https://openalex.org/W2895305554","https://openalex.org/W2997929922","https://openalex.org/W3035977869","https://openalex.org/W3138530731","https://openalex.org/W3161018377","https://openalex.org/W3176827258","https://openalex.org/W3177284785","https://openalex.org/W3216608520","https://openalex.org/W4281657881","https://openalex.org/W4289912265","https://openalex.org/W4291517473","https://openalex.org/W4296544616","https://openalex.org/W4312640224","https://openalex.org/W4318603194","https://openalex.org/W4379985703","https://openalex.org/W4389720324","https://openalex.org/W4392715990","https://openalex.org/W4400409874","https://openalex.org/W4401408715","https://openalex.org/W4404485591","https://openalex.org/W4406171830"],"related_works":[],"abstract_inverted_index":{"Low-precision":[0],"computing":[1,10],"is":[2],"essential":[3],"for":[4,19],"efficiently":[5],"utilizing":[6],"memory":[7],"bandwidth":[8],"and":[9,43,111,116,132,142],"cores.":[11],"While":[12],"many":[13],"mixed-precision":[14,118,127],"algorithms":[15],"have":[16],"been":[17],"developed":[18],"iterative":[20],"sparse":[21],"linear":[22],"solvers,":[23,129],"effectively":[24],"leveraging":[25],"half-precision":[26],"(fp16)":[27],"arithmetic":[28],"remains":[29],"challenging.":[30],"This":[31],"study":[32],"introduces":[33],"a":[34,47,75],"novel":[35],"nested":[36,49,82],"Krylov":[37,128],"approach":[38,96],"that":[39,91],"integrates":[40],"the":[41,59,69,81,95,122],"FGMRES":[42],"Richardson":[44],"methods":[45],"in":[46],"deeply":[48],"structure,":[50],"progressively":[51],"reducing":[52],"precision":[53,67],"from":[54],"double-precision":[55,115],"to":[56,108,139],"fp16":[57,93],"toward":[58],"innermost":[60],"solver.":[61],"To":[62],"avoid":[63],"meaningless":[64],"computations":[65],"beyond":[66],"limits,":[68],"low-precision":[70],"inner":[71],"solvers":[72],"perform":[73],"only":[74],"few":[76],"iterations":[77],"per":[78],"invocation,":[79],"while":[80],"structure":[83],"ensures":[84],"their":[85],"frequent":[86],"execution.":[87],"Numerical":[88],"experiments":[89],"show":[90],"incorporating":[92],"into":[94],"directly":[97],"enhances":[98],"solver":[99],"performance":[100],"without":[101],"compromising":[102],"convergence,":[103],"achieving":[104],"speedups":[105],"of":[106,137],"up":[107,138],"1.65":[109],"\u00d7":[110,113],"2.42":[112],"over":[114],"double-single":[117],"implementations,":[119],"respectively.":[120,144],"Furthermore,":[121],"proposed":[123],"method":[124],"outperforms":[125],"conventional":[126],"CG,":[130],"BiCGStab,":[131],"restarted":[133],"FGMRES,":[134],"by":[135],"factors":[136],"2.47,":[140],"2.74,":[141],"69.10,":[143]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-12T00:00:00"}
