{"id":"https://openalex.org/W4392449925","doi":"https://doi.org/10.1109/tpds.2024.3372473","title":"Optimizing Multi-Grid Preconditioned Conjugate Gradient Method on Multi-Cores","display_name":"Optimizing Multi-Grid Preconditioned Conjugate Gradient Method on Multi-Cores","publication_year":2024,"publication_date":"2024-03-05","ids":{"openalex":"https://openalex.org/W4392449925","doi":"https://doi.org/10.1109/tpds.2024.3372473"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2024.3372473","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2024.3372473","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038684857","display_name":"Y. X. Fan","orcid":"https://orcid.org/0009-0003-9022-0727"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]},{"id":"https://openalex.org/I4610292","display_name":"Xiangtan University","ror":"https://ror.org/00xsfaz62","country_code":"CN","type":"education","lineage":["https://openalex.org/I4610292"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Fan Yuan","raw_affiliation_strings":["Department of Mathematics, National University of Defense Technology, Changsha, China","Department of Mathematics, Xiangtan University, Xiangtan, China"],"raw_orcid":"https://orcid.org/0009-0003-9022-0727","affiliations":[{"raw_affiliation_string":"Department of Mathematics, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"Department of Mathematics, Xiangtan University, Xiangtan, China","institution_ids":["https://openalex.org/I4610292"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101776727","display_name":"Xiaojian Yang","orcid":"https://orcid.org/0009-0007-9821-171X"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaojian Yang","raw_affiliation_strings":["College of Computer Science, National University of Defense Technology, Changsha, China"],"raw_orcid":"https://orcid.org/0009-0007-9821-171X","affiliations":[{"raw_affiliation_string":"College of Computer Science, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087730416","display_name":"Shengguo Li","orcid":"https://orcid.org/0000-0001-7827-6304"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengguo Li","raw_affiliation_strings":["College of Computer Science, National University of Defense Technology, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0001-7827-6304","affiliations":[{"raw_affiliation_string":"College of Computer Science, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006729432","display_name":"Dezun Dong","orcid":"https://orcid.org/0000-0001-6243-8479"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dezun Dong","raw_affiliation_strings":["College of Computer Science, National University of Defense Technology, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0001-6243-8479","affiliations":[{"raw_affiliation_string":"College of Computer Science, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101626850","display_name":"Chun Huang","orcid":"https://orcid.org/0000-0002-0317-8192"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chun Huang","raw_affiliation_strings":["College of Computer Science, National University of Defense Technology, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0002-0317-8192","affiliations":[{"raw_affiliation_string":"College of Computer Science, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030293790","display_name":"Zheng Wang","orcid":"https://orcid.org/0009-0001-7858-6238"},"institutions":[{"id":"https://openalex.org/I37802460","display_name":"Northwest University","ror":"https://ror.org/00z3td547","country_code":"CN","type":"education","lineage":["https://openalex.org/I37802460"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zheng Wang","raw_affiliation_strings":["School of Information Sciences and Technology, Northwest University, Xi&#x2019;an, China"],"raw_orcid":"https://orcid.org/0009-0001-7858-6238","affiliations":[{"raw_affiliation_string":"School of Information Sciences and Technology, Northwest University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I37802460"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5038684857"],"corresponding_institution_ids":["https://openalex.org/I170215575","https://openalex.org/I4610292"],"apc_list":null,"apc_paid":null,"fwci":4.2692,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.952458,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"35","issue":"5","first_page":"768","last_page":"779"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8514155745506287},{"id":"https://openalex.org/keywords/multigrid-method","display_name":"Multigrid method","score":0.768936276435852},{"id":"https://openalex.org/keywords/conjugate-gradient-method","display_name":"Conjugate gradient method","score":0.7677034139633179},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7566215991973877},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6556521654129028},{"id":"https://openalex.org/keywords/xeon","display_name":"Xeon","score":0.6495668888092041},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.586563766002655},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.47426021099090576},{"id":"https://openalex.org/keywords/grid","display_name":"Grid","score":0.469421923160553},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4321843981742859},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.43032270669937134},{"id":"https://openalex.org/keywords/distributed-memory","display_name":"Distributed memory","score":0.4285559058189392},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.3440793752670288},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.304157555103302},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.29041552543640137}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8514155745506287},{"id":"https://openalex.org/C137119250","wikidata":"https://www.wikidata.org/wiki/Q1413101","display_name":"Multigrid method","level":3,"score":0.768936276435852},{"id":"https://openalex.org/C81184566","wikidata":"https://www.wikidata.org/wiki/Q1191895","display_name":"Conjugate gradient method","level":2,"score":0.7677034139633179},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7566215991973877},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6556521654129028},{"id":"https://openalex.org/C145108525","wikidata":"https://www.wikidata.org/wiki/Q656154","display_name":"Xeon","level":2,"score":0.6495668888092041},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.586563766002655},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.47426021099090576},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.469421923160553},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4321843981742859},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.43032270669937134},{"id":"https://openalex.org/C91481028","wikidata":"https://www.wikidata.org/wiki/Q1054686","display_name":"Distributed memory","level":3,"score":0.4285559058189392},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3440793752670288},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.304157555103302},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.29041552543640137},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C93779851","wikidata":"https://www.wikidata.org/wiki/Q271977","display_name":"Partial differential equation","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2024.3372473","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2024.3372473","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4399999976158142,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[{"id":"https://openalex.org/G229453057","display_name":null,"funder_award_id":"62073333","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3860502462","display_name":null,"funder_award_id":"62102444","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W105754624","https://openalex.org/W993511226","https://openalex.org/W1506342804","https://openalex.org/W1589783193","https://openalex.org/W1845641575","https://openalex.org/W1991173503","https://openalex.org/W1994104955","https://openalex.org/W1997542937","https://openalex.org/W2004951603","https://openalex.org/W2009196736","https://openalex.org/W2013029558","https://openalex.org/W2026024102","https://openalex.org/W2035080386","https://openalex.org/W2060803192","https://openalex.org/W2080090223","https://openalex.org/W2082146198","https://openalex.org/W2111784516","https://openalex.org/W2148497060","https://openalex.org/W2153190325","https://openalex.org/W2314577266","https://openalex.org/W2318393100","https://openalex.org/W2331164610","https://openalex.org/W2498157954","https://openalex.org/W2529902957","https://openalex.org/W2770393439","https://openalex.org/W2794424798","https://openalex.org/W2802556464","https://openalex.org/W2885585951","https://openalex.org/W2901426413","https://openalex.org/W3041159379","https://openalex.org/W3098281632","https://openalex.org/W3127904641","https://openalex.org/W3210190478","https://openalex.org/W3210601829","https://openalex.org/W4241696959","https://openalex.org/W4247482445","https://openalex.org/W4301491118","https://openalex.org/W4312280891","https://openalex.org/W4381328331","https://openalex.org/W6633647612","https://openalex.org/W6728443131","https://openalex.org/W6751040661"],"related_works":["https://openalex.org/W2155025153","https://openalex.org/W4253880741","https://openalex.org/W2361205607","https://openalex.org/W2026512611","https://openalex.org/W4235544854","https://openalex.org/W17377283","https://openalex.org/W1531300359","https://openalex.org/W1985165680","https://openalex.org/W4245497162","https://openalex.org/W2353146130"],"abstract_inverted_index":{"Multigrid":[0],"preconditioned":[1],"conjugate":[2,93],"gradient":[3,94],"(MGPCG)":[4],"is":[5,15],"commonly":[6],"used":[7],"in":[8],"high-performance":[9,92],"computing":[10],"(HPC)":[11],"workloads.":[12],"However,":[13],"MGPCG":[14,55],"notoriously":[16],"challenging":[17],"to":[18,44,72,90,127],"optimize":[19],"since":[20],"most":[21],"of":[22,54,61,84],"its":[23],"computation":[24],"kernels":[25,60],"are":[26],"memory-bounded":[27],"with":[28,100],"low":[29],"arithmetic":[30],"intensity":[31],"and":[32,49,97,115,134,141],"non-trivial":[33],"communication":[34,52,70],"patterns":[35],"among":[36],"parallel":[37,78],"processes.":[38,79],"This":[39],"article":[40],"presents":[41],"new":[42],"techniques":[43],"improve":[45],"the":[46,51,59,74,82,91,108,132],"data":[47,75],"locality":[48],"reduce":[50,73],"overhead":[53],"by":[56,87],"first":[57],"merging":[58],"multigrid":[62,104],"(MG).":[63],"We":[64,80,106],"then":[65],"develop":[66],"an":[67],"asynchronous":[68],"neighboring":[69],"algorithm":[71],"communications":[76],"across":[77,138],"demonstrated":[81],"benefits":[83],"our":[85,124],"approach":[86,125],"applying":[88],"it":[89,99],"(HPCG)":[95],"benchmark":[96],"integrating":[98],"a":[101,128],"real-life":[102],"algebraic":[103],"package.":[105],"test":[107],"resulting":[109],"software":[110],"implementations":[111,137],"on":[112],"three":[113],"ARMv8":[114],"one":[116],"Intel":[117],"Xeon":[118],"system.":[119],"Experimental":[120],"results":[121],"show":[122],"that":[123],"leads":[126],"1.62x-2.54x":[129],"speedup":[130],"over":[131],"engineer-":[133],"vendor-tuned":[135],"HPCG":[136],"various":[139],"workloads":[140],"platforms.":[142]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
