{"id":"https://openalex.org/W2331164610","doi":"https://doi.org/10.1177/1094342015593157","title":"Optimizations in a high-performance conjugate gradient benchmark for IA-based multi- and many-core processors","display_name":"Optimizations in a high-performance conjugate gradient benchmark for IA-based multi- and many-core processors","publication_year":2015,"publication_date":"2015-06-29","ids":{"openalex":"https://openalex.org/W2331164610","doi":"https://doi.org/10.1177/1094342015593157","mag":"2331164610"},"language":"en","primary_location":{"id":"doi:10.1177/1094342015593157","is_oa":false,"landing_page_url":"https://doi.org/10.1177/1094342015593157","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101876582","display_name":"Jongsoo Park","orcid":"https://orcid.org/0000-0002-4750-9440"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jongsoo Park","raw_affiliation_strings":["Parallel Computing Laboratory, Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Laboratory, Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007892541","display_name":"Mikhail Smelyanskiy","orcid":"https://orcid.org/0000-0002-2433-6110"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mikhail Smelyanskiy","raw_affiliation_strings":["Parallel Computing Laboratory, Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Laboratory, Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110182945","display_name":"Karthikeyan Vaidyanathan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210146682","display_name":"Intel (India)","ror":"https://ror.org/04f2n1245","country_code":"IN","type":"company","lineage":["https://openalex.org/I1343180700","https://openalex.org/I4210146682"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Karthikeyan Vaidyanathan","raw_affiliation_strings":["Parallel Computing Laboratory, Intel Corporation, India"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Laboratory, Intel Corporation, India","institution_ids":["https://openalex.org/I4210146682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024066643","display_name":"Alexander Heinecke","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexander Heinecke","raw_affiliation_strings":["Parallel Computing Laboratory, Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Laboratory, Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018944355","display_name":"Dhiraj Kalamkar","orcid":null},"institutions":[{"id":"https://openalex.org/I4210146682","display_name":"Intel (India)","ror":"https://ror.org/04f2n1245","country_code":"IN","type":"company","lineage":["https://openalex.org/I1343180700","https://openalex.org/I4210146682"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Dhiraj D Kalamkar","raw_affiliation_strings":["Parallel Computing Laboratory, Intel Corporation, India"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Laboratory, Intel Corporation, India","institution_ids":["https://openalex.org/I4210146682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067944881","display_name":"Md. Mosotofa Ali Patwary","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Md Mosotofa Ali Patwary","raw_affiliation_strings":["Parallel Computing Laboratory, Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Laboratory, Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068139008","display_name":"Vadim Pirogov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vadim Pirogov","raw_affiliation_strings":["Software and Service Group, Intel Corporation, Russia"],"affiliations":[{"raw_affiliation_string":"Software and Service Group, Intel Corporation, Russia","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032238070","display_name":"Pradeep Dubey","orcid":"https://orcid.org/0000-0001-5853-0619"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pradeep Dubey","raw_affiliation_strings":["Parallel Computing Laboratory, Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Laboratory, Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100381480","display_name":"Xing Liu","orcid":"https://orcid.org/0000-0003-0824-5830"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xing Liu","raw_affiliation_strings":["T. J. Watson Research Center, IBM Research, USA"],"affiliations":[{"raw_affiliation_string":"T. J. Watson Research Center, IBM Research, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112217286","display_name":"Carlos Rosales","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Carlos Rosales","raw_affiliation_strings":["Texas Advanced Computing Center, University of Texas at Austin, USA"],"affiliations":[{"raw_affiliation_string":"Texas Advanced Computing Center, University of Texas at Austin, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017610131","display_name":"Cyril Mazauric","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cyril Mazauric","raw_affiliation_strings":["Application and Performance Team, Bull, France"],"affiliations":[{"raw_affiliation_string":"Application and Performance Team, Bull, France","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039454582","display_name":"Christopher Daley","orcid":"https://orcid.org/0000-0003-3105-0804"},"institutions":[{"id":"https://openalex.org/I4210151627","display_name":"National Energy Research Scientific Computing Center","ror":"https://ror.org/05v3mvq14","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521","https://openalex.org/I4210151627"]},{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher Daley","raw_affiliation_strings":["National Energy Research Scientific Computing Center, Lawrence Berkeley National Laboratory, USA"],"affiliations":[{"raw_affiliation_string":"National Energy Research Scientific Computing Center, Lawrence Berkeley National Laboratory, USA","institution_ids":["https://openalex.org/I4210151627","https://openalex.org/I148283060"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5101876582"],"corresponding_institution_ids":["https://openalex.org/I1343180700"],"apc_list":null,"apc_paid":null,"fwci":1.2919,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.81625665,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"30","issue":"1","first_page":"11","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8484696745872498},{"id":"https://openalex.org/keywords/xeon-phi","display_name":"Xeon Phi","score":0.8146750926971436},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7913156747817993},{"id":"https://openalex.org/keywords/coprocessor","display_name":"Coprocessor","score":0.5234788656234741},{"id":"https://openalex.org/keywords/xeon","display_name":"Xeon","score":0.48453134298324585},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4523972272872925},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.45163848996162415},{"id":"https://openalex.org/keywords/conjugate-gradient-method","display_name":"Conjugate gradient method","score":0.43129363656044006},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2904106378555298}],"concepts":[{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8484696745872498},{"id":"https://openalex.org/C96972482","wikidata":"https://www.wikidata.org/wiki/Q1049168","display_name":"Xeon Phi","level":2,"score":0.8146750926971436},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7913156747817993},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.5234788656234741},{"id":"https://openalex.org/C145108525","wikidata":"https://www.wikidata.org/wiki/Q656154","display_name":"Xeon","level":2,"score":0.48453134298324585},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4523972272872925},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.45163848996162415},{"id":"https://openalex.org/C81184566","wikidata":"https://www.wikidata.org/wiki/Q1191895","display_name":"Conjugate gradient method","level":2,"score":0.43129363656044006},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2904106378555298},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1177/1094342015593157","is_oa":false,"landing_page_url":"https://doi.org/10.1177/1094342015593157","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.4399999976158142}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W993511226","https://openalex.org/W1252105715","https://openalex.org/W1490374280","https://openalex.org/W1506342804","https://openalex.org/W1588915715","https://openalex.org/W1589783193","https://openalex.org/W1963564197","https://openalex.org/W1987840949","https://openalex.org/W1987896015","https://openalex.org/W1991173503","https://openalex.org/W2005073368","https://openalex.org/W2009196736","https://openalex.org/W2037699678","https://openalex.org/W2039789965","https://openalex.org/W2042879774","https://openalex.org/W2045175083","https://openalex.org/W2045618500","https://openalex.org/W2060803192","https://openalex.org/W2066943869","https://openalex.org/W2080090223","https://openalex.org/W2081008785","https://openalex.org/W2094171110","https://openalex.org/W2096714979","https://openalex.org/W2100799646","https://openalex.org/W2111784516","https://openalex.org/W2112397379","https://openalex.org/W2147743629","https://openalex.org/W2152914333","https://openalex.org/W2166604263","https://openalex.org/W2169583500","https://openalex.org/W2181846018","https://openalex.org/W2342261098","https://openalex.org/W3147460469","https://openalex.org/W4241696959","https://openalex.org/W4255188506"],"related_works":["https://openalex.org/W2085105049","https://openalex.org/W3203561460","https://openalex.org/W3009624197","https://openalex.org/W4251138667","https://openalex.org/W2682544458","https://openalex.org/W1969709731","https://openalex.org/W2030340070","https://openalex.org/W4239672454","https://openalex.org/W4388580994","https://openalex.org/W2951241120"],"abstract_inverted_index":{"This":[0,76],"paper":[1],"presents":[2],"optimizations":[3,186,247],"in":[4,34,45,83,152,216],"a":[5,117,133,258,264],"high-performance":[6],"conjugate":[7],"gradient":[8],"benchmark":[9,28],"(HPCG)":[10],"for":[11,95,156,208],"multi-core":[12],"Intel":[13],"\u00ae":[14,16],"Xeon":[15,20,86,153,182],"processors":[17,36,105],"and":[18,43,103,109,203,221],"many-core":[19],"Phi\u2122":[21],"coprocessors.":[22],"Without":[23],"careful":[24],"optimization,":[25],"the":[26,30,47,64,69,89,113,157,161,165,170,192,198,251],"HPCG":[27,189,252],"under-utilizes":[29],"compute":[31],"resources":[32],"available":[33],"modern":[35],"due":[37],"to":[38,62,140],"its":[39],"low":[40,65],"arithmetic":[41,66],"intensity":[42],"challenges":[44],"parallelizing":[46],"Gauss\u2013Seidel":[48],"smoother":[49],"(GS).":[50],"Our":[51,125,145,210],"optimized":[52,211],"implementation":[53,126,147,190,212],"fuses":[54],"GS":[55,129,146],"with":[56,116,142,149,180,218],"sparse":[57,96],"matrix":[58,97],"vector":[59],"multiplication":[60],"(SpMV)":[61],"address":[63,169],"intensity,":[67],"overcoming":[68],"performance":[70,115],"otherwise":[71],"bound":[72],"by":[73],"memory":[74,202],"bandwidth.":[75],"fusion":[77],"optimization":[78],"is":[79,255],"progressively":[80],"more":[81],"effective":[82],"newer":[84],"generation":[85],"processors,":[87],"demonstrating":[88],"usefulness":[90],"of":[91,112,160,197,237,267],"their":[92],"larger":[93],"caches":[94],"operations:":[98],"Sandy":[99],"Bridge,":[100,102],"Ivy":[101],"Haswell":[104],"achieve":[106],"93%,":[107],"99%,":[108],"103%,":[110],"respectively,":[111],"ideal":[114],"constraint":[118],"that":[119,135,194,224,245],"matrices":[120],"are":[121],"streamed":[122],"from":[123,191,200],"memory.":[124],"also":[127,263],"parallelizes":[128],"using":[130,173],"fine-grain":[131],"level-scheduling,":[132],"method":[134],"has":[136,213],"been":[137,214],"believed":[138],"not":[139,248],"scale":[141],"many":[143],"cores.":[144],"scales":[148],"60":[150],"cores":[151],"Phi":[154,183],"coprocessors,":[155],"finest":[158],"level":[159],"multi-grid":[162],"pre-conditioner.":[163],"At":[164],"coarser":[166],"levels,":[167],"we":[168,222,243],"limited":[171],"parallelism":[172],"block":[174],"multi-color":[175,206],"re-ordering,":[176],"achieving":[177],"21":[178],"GFLOPS":[179],"one":[181],"coprocessor.":[184],"These":[185],"distinguish":[187],"our":[188,246],"others":[193],"stream":[195],"most":[196],"data":[199],"main":[201],"rely":[204],"on":[205,257],"re-ordering":[207],"parallelism.":[209],"evaluated":[215],"clusters":[217],"various":[219],"configurations,":[220],"find":[223],"low-diameter":[225],"high-radix":[226],"network":[227],"topologies":[228],"such":[229],"as":[230],"Dragonfly":[231],"realize":[232],"high":[233],"parallelization":[234],"efficiencies":[235],"because":[236],"fast":[238],"all-reduce":[239],"collectives.":[240],"In":[241],"addition,":[242],"demonstrate":[244],"only":[249],"benefit":[250],"dataset,":[253],"which":[254],"based":[256],"structured":[259],"3D":[260],"grid,":[261],"but":[262],"wide":[265],"range":[266],"unstructured":[268],"matrices.":[269]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2021,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
