{"id":"https://openalex.org/W4415250930","doi":"https://doi.org/10.1109/hpec67600.2025.11196269","title":"GPU-Accelerated, Mixed Precision GMRES(m) with Varied Restarts","display_name":"GPU-Accelerated, Mixed Precision GMRES(m) with Varied Restarts","publication_year":2025,"publication_date":"2025-09-15","ids":{"openalex":"https://openalex.org/W4415250930","doi":"https://doi.org/10.1109/hpec67600.2025.11196269"},"language":"en","primary_location":{"id":"doi:10.1109/hpec67600.2025.11196269","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196269","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020045263","display_name":"A.N.M.M. Haque","orcid":"https://orcid.org/0000-0003-1018-1354"},"institutions":[{"id":"https://openalex.org/I146416000","display_name":"University of Kansas","ror":"https://ror.org/001tmjg57","country_code":"US","type":"education","lineage":["https://openalex.org/I146416000"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Abir Haque","raw_affiliation_strings":["University of Kansas,Institute for Information Sciences,Department of Electrical Engineering and Computer Science"],"affiliations":[{"raw_affiliation_string":"University of Kansas,Institute for Information Sciences,Department of Electrical Engineering and Computer Science","institution_ids":["https://openalex.org/I146416000"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082723056","display_name":"Suzanne M. Shontz","orcid":"https://orcid.org/0000-0002-4874-0812"},"institutions":[{"id":"https://openalex.org/I146416000","display_name":"University of Kansas","ror":"https://ror.org/001tmjg57","country_code":"US","type":"education","lineage":["https://openalex.org/I146416000"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Suzanne M. Shontz","raw_affiliation_strings":["The University of Kansas,Department of Electrical Engineering and Computer Science,Lawrence,USA"],"affiliations":[{"raw_affiliation_string":"The University of Kansas,Department of Electrical Engineering and Computer Science,Lawrence,USA","institution_ids":["https://openalex.org/I146416000"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009472962","display_name":"Xuemin Tu","orcid":null},"institutions":[{"id":"https://openalex.org/I146416000","display_name":"University of Kansas","ror":"https://ror.org/001tmjg57","country_code":"US","type":"education","lineage":["https://openalex.org/I146416000"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xuemin Tu","raw_affiliation_strings":["The University of Kansas,Department of Mathematics,Lawrence,USA"],"affiliations":[{"raw_affiliation_string":"The University of Kansas,Department of Mathematics,Lawrence,USA","institution_ids":["https://openalex.org/I146416000"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5020045263"],"corresponding_institution_ids":["https://openalex.org/I146416000"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.28120779,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10052","display_name":"Medical Image Segmentation Techniques","score":0.9775999784469604,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10052","display_name":"Medical Image Segmentation Techniques","score":0.9775999784469604,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12983","display_name":"Satellite Image Processing and Photogrammetry","score":0.9588000178337097,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.9350000023841858,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.6807000041007996},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6741999983787537},{"id":"https://openalex.org/keywords/invertible-matrix","display_name":"Invertible matrix","score":0.5386000275611877},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.4002000093460083},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.3856000006198883},{"id":"https://openalex.org/keywords/iterative-method","display_name":"Iterative method","score":0.3675999939441681},{"id":"https://openalex.org/keywords/rate-of-convergence","display_name":"Rate of convergence","score":0.3668999969959259}],"concepts":[{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.6807000041007996},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6741999983787537},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6176999807357788},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.597000002861023},{"id":"https://openalex.org/C96442724","wikidata":"https://www.wikidata.org/wiki/Q242188","display_name":"Invertible matrix","level":2,"score":0.5386000275611877},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4255000054836273},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4002000093460083},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.3856000006198883},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.3675999939441681},{"id":"https://openalex.org/C57869625","wikidata":"https://www.wikidata.org/wiki/Q1783502","display_name":"Rate of convergence","level":3,"score":0.3668999969959259},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.3659999966621399},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.36160001158714294},{"id":"https://openalex.org/C2779982483","wikidata":"https://www.wikidata.org/wiki/Q6094420","display_name":"Iterative refinement","level":2,"score":0.3330000042915344},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.33079999685287476},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.32710000872612},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.32420000433921814},{"id":"https://openalex.org/C88626702","wikidata":"https://www.wikidata.org/wiki/Q1128903","display_name":"Continuation","level":2,"score":0.2833999991416931},{"id":"https://openalex.org/C2987595161","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Optimization algorithm","level":2,"score":0.2671000063419342},{"id":"https://openalex.org/C41045048","wikidata":"https://www.wikidata.org/wiki/Q202843","display_name":"Linear programming","level":2,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec67600.2025.11196269","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196269","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320310131","display_name":"University of Kansas","ror":"https://ror.org/001tmjg57"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1506342804","https://openalex.org/W1575701986","https://openalex.org/W2035080386","https://openalex.org/W2128573164","https://openalex.org/W2140153041","https://openalex.org/W2921480401","https://openalex.org/W2963078637","https://openalex.org/W3099348172","https://openalex.org/W3138530731","https://openalex.org/W3176827258","https://openalex.org/W4281657881","https://openalex.org/W4289912265","https://openalex.org/W4318603194"],"related_works":[],"abstract_inverted_index":{"GMRES(m)":[0,25,83,200],"is":[1,21,93],"a":[2,19,79,133,159],"popular":[3],"iterative":[4],"method":[5,62,92,110],"for":[6,81,118],"solving":[7],"linear":[8],"systems":[9],"with":[10,190],"large,":[11],"sparse,":[12],"(possibly)":[13],"nonsymmetric,":[14],"and":[15,122,153,186,201,224],"invertible":[16],"matrices.":[17],"If":[18],"matrix":[20],"not":[22,211],"positive":[23],"definite,":[24],"will":[26],"exhibit":[27],"slow":[28],"or":[29],"stagnating":[30],"convergence":[31,66],"behavior.":[32],"Baker":[33,150],"et":[34,75,151,155],"al.":[35,76,152,156],"have":[36,77,227],"proposed":[37,78,148],"\u03b1GMRES(m<inf":[38,163,192,203],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[39,41,164,167,193,195,204,206],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">max</inf>,m<inf":[40,194,205],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">min</inf>),":[42,207],"which":[43],"modifies":[44],"the":[45,69,96,113,123,219],"restart":[46],"parameter":[47],"m":[48],"based":[49],"on":[50,176,228],"angles":[51],"between":[52],"consecutive":[53],"residual":[54],"vectors":[55],"across":[56],"two":[57],"cycles":[58],"of":[59,71,90,116,125,136,162,213],"GMRES(m).":[60,73,181],"This":[61],"results":[63,131],"in":[64,99,132],"faster":[65],"compared":[67,178],"to":[68,179],"baseline":[70,124],"full-precision":[72,126,180,202],"Lindquist":[74,154],"strategy":[80],"accelerating":[82],"via":[84],"mixed":[85,120],"precision":[86,121],"arithmetic.":[87],"The":[88],"development":[89],"this":[91],"motivated":[94],"by":[95,140,149],"recent":[97],"trend":[98],"newer":[100,141],"GPUs":[101],"offering":[102],"better":[103,174,188],"low-precision":[104,137],"performance":[105,175,189],"than":[106],"high-precision.":[107],"Although":[108],"their":[109,128],"takes":[111],"around":[112],"same":[114],"number":[115],"iterations":[117],"both":[119,198],"GMRES(m),":[127],"GPU":[129,170],"implementation":[130,161,171],"greater":[134],"amount":[135],"parallelism":[138],"offered":[139],"GPUs.":[142],"Our":[143,169],"work":[144,216],"combines":[145],"optimization":[146],"strategies":[147],"We":[157,182,209],"present":[158],"mixed-precision":[160,191,199,222],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">max</inf>,":[165],"m<inf":[166],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">min</inf>).":[168],"exhibits":[172],"47.16%":[173],"average":[177],"also":[183],"show":[184],"38.89%":[185],"7.8%":[187],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">min</inf>)":[196],"over":[197],"respectively.":[208],"are":[210],"aware":[212],"any":[214],"other":[215],"that":[217,221],"explores":[218],"effects":[220],"arithmetic":[223],"varied":[225],"restarts":[226],"each":[229],"other.":[230]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-16T00:00:00"}
