{"id":"https://openalex.org/W2107406335","doi":"https://doi.org/10.1177/1094342014552086","title":"Optimization of MPI collective operations on the IBM Blue Gene/Q supercomputer","display_name":"Optimization of MPI collective operations on the IBM Blue Gene/Q supercomputer","publication_year":2014,"publication_date":"2014-11-01","ids":{"openalex":"https://openalex.org/W2107406335","doi":"https://doi.org/10.1177/1094342014552086","mag":"2107406335"},"language":"en","primary_location":{"id":"doi:10.1177/1094342014552086","is_oa":false,"landing_page_url":"https://doi.org/10.1177/1094342014552086","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070117771","display_name":"Sameer Kumar","orcid":"https://orcid.org/0000-0001-8697-7370"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]},{"id":"https://openalex.org/I4210129961","display_name":"IBM (India)","ror":"https://ror.org/034ahpr11","country_code":"IN","type":"company","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210129961"]}],"countries":["IN","US"],"is_corresponding":true,"raw_author_name":"Sameer Kumar","raw_affiliation_strings":["IBM India Research Center, Nagawara, Bangalore, India","IBM India Research Center, Nagawara, Bangalore, India#TAB#"],"affiliations":[{"raw_affiliation_string":"IBM India Research Center, Nagawara, Bangalore, India","institution_ids":["https://openalex.org/I4210129961"]},{"raw_affiliation_string":"IBM India Research Center, Nagawara, Bangalore, India#TAB#","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109965519","display_name":"Amith R. Mamidala","orcid":null},"institutions":[{"id":"https://openalex.org/I4210129961","display_name":"IBM (India)","ror":"https://ror.org/034ahpr11","country_code":"IN","type":"company","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210129961"]},{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["IN","US"],"is_corresponding":false,"raw_author_name":"Amith Mamidala","raw_affiliation_strings":["IBM India Research Center, Nagawara, Bangalore, India","IBM India Research Center, Nagawara, Bangalore, India#TAB#"],"affiliations":[{"raw_affiliation_string":"IBM India Research Center, Nagawara, Bangalore, India","institution_ids":["https://openalex.org/I4210129961"]},{"raw_affiliation_string":"IBM India Research Center, Nagawara, Bangalore, India#TAB#","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062754428","display_name":"Philip Heidelberger","orcid":null},"institutions":[{"id":"https://openalex.org/I4210129961","display_name":"IBM (India)","ror":"https://ror.org/034ahpr11","country_code":"IN","type":"company","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210129961"]},{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["IN","US"],"is_corresponding":false,"raw_author_name":"Philip Heidelberger","raw_affiliation_strings":["IBM India Research Center, Nagawara, Bangalore, India","IBM India Research Center, Nagawara, Bangalore, India#TAB#"],"affiliations":[{"raw_affiliation_string":"IBM India Research Center, Nagawara, Bangalore, India","institution_ids":["https://openalex.org/I4210129961"]},{"raw_affiliation_string":"IBM India Research Center, Nagawara, Bangalore, India#TAB#","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100319457","display_name":"Dong Chen","orcid":"https://orcid.org/0000-0002-2209-2183"},"institutions":[{"id":"https://openalex.org/I4210129961","display_name":"IBM (India)","ror":"https://ror.org/034ahpr11","country_code":"IN","type":"company","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210129961"]},{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["IN","US"],"is_corresponding":false,"raw_author_name":"Dong Chen","raw_affiliation_strings":["IBM India Research Center, Nagawara, Bangalore, India","IBM India Research Center, Nagawara, Bangalore, India#TAB#"],"affiliations":[{"raw_affiliation_string":"IBM India Research Center, Nagawara, Bangalore, India","institution_ids":["https://openalex.org/I4210129961"]},{"raw_affiliation_string":"IBM India Research Center, Nagawara, Bangalore, India#TAB#","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062250655","display_name":"Daniel A. Faraj","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Faraj","raw_affiliation_strings":["Intel Technical Computing Group, Edina, MN, USA","Intel Technical Computing Group, Edina, MN, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Intel Technical Computing Group, Edina, MN, USA","institution_ids":["https://openalex.org/I1343180700"]},{"raw_affiliation_string":"Intel Technical Computing Group, Edina, MN, USA#TAB#","institution_ids":["https://openalex.org/I1343180700"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5070117771"],"corresponding_institution_ids":["https://openalex.org/I1341412227","https://openalex.org/I4210129961"],"apc_list":null,"apc_paid":null,"fwci":3.3713,"has_fulltext":false,"cited_by_count":30,"citation_normalized_percentile":{"value":0.92602448,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"28","issue":"4","first_page":"450","last_page":"464"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8424088954925537},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.810246467590332},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.7989697456359863},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.722009539604187},{"id":"https://openalex.org/keywords/massively-parallel","display_name":"Massively parallel","score":0.7139756679534912},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.6807733178138733},{"id":"https://openalex.org/keywords/ibm","display_name":"IBM","score":0.5896023511886597},{"id":"https://openalex.org/keywords/message-passing","display_name":"Message passing","score":0.5040010809898376},{"id":"https://openalex.org/keywords/network-packet","display_name":"Network packet","score":0.47210565209388733},{"id":"https://openalex.org/keywords/programming-paradigm","display_name":"Programming paradigm","score":0.445158988237381},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4450303912162781},{"id":"https://openalex.org/keywords/grid-network","display_name":"Grid network","score":0.4221140146255493},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.41558289527893066},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4140244722366333},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.3149907886981964},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.21429792046546936},{"id":"https://openalex.org/keywords/grid","display_name":"Grid","score":0.13878333568572998},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07314488291740417},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.07003045082092285}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8424088954925537},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.810246467590332},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.7989697456359863},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.722009539604187},{"id":"https://openalex.org/C190475519","wikidata":"https://www.wikidata.org/wiki/Q544384","display_name":"Massively parallel","level":2,"score":0.7139756679534912},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.6807733178138733},{"id":"https://openalex.org/C70388272","wikidata":"https://www.wikidata.org/wiki/Q5968558","display_name":"IBM","level":2,"score":0.5896023511886597},{"id":"https://openalex.org/C854659","wikidata":"https://www.wikidata.org/wiki/Q1859284","display_name":"Message passing","level":2,"score":0.5040010809898376},{"id":"https://openalex.org/C158379750","wikidata":"https://www.wikidata.org/wiki/Q214111","display_name":"Network packet","level":2,"score":0.47210565209388733},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.445158988237381},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4450303912162781},{"id":"https://openalex.org/C24337046","wikidata":"https://www.wikidata.org/wiki/Q4394138","display_name":"Grid network","level":3,"score":0.4221140146255493},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.41558289527893066},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4140244722366333},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.3149907886981964},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.21429792046546936},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.13878333568572998},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07314488291740417},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.07003045082092285},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1177/1094342014552086","is_oa":false,"landing_page_url":"https://doi.org/10.1177/1094342014552086","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320338286","display_name":"Lawrence Livermore National Laboratory","ror":"https://ror.org/041nk4h53"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1519646686","https://openalex.org/W1861151562","https://openalex.org/W1944805600","https://openalex.org/W1980670496","https://openalex.org/W1987117608","https://openalex.org/W1987437824","https://openalex.org/W1991732708","https://openalex.org/W2009852231","https://openalex.org/W2014471579","https://openalex.org/W2035079906","https://openalex.org/W2059531889","https://openalex.org/W2061388644","https://openalex.org/W2081612620","https://openalex.org/W2083150715","https://openalex.org/W2084557272","https://openalex.org/W2087124296","https://openalex.org/W2087266442","https://openalex.org/W2103393368","https://openalex.org/W2113290275","https://openalex.org/W2115020004","https://openalex.org/W2123287621","https://openalex.org/W2123859357","https://openalex.org/W2131613942","https://openalex.org/W2138690674","https://openalex.org/W2143425413","https://openalex.org/W2158452341","https://openalex.org/W2163967521","https://openalex.org/W2164945803","https://openalex.org/W2165808215","https://openalex.org/W2198690661","https://openalex.org/W3021029666","https://openalex.org/W4239059712","https://openalex.org/W4245836145"],"related_works":["https://openalex.org/W2255048617","https://openalex.org/W2092102951","https://openalex.org/W1512604874","https://openalex.org/W2140636994","https://openalex.org/W2142204081","https://openalex.org/W2158982898","https://openalex.org/W1784407820","https://openalex.org/W2883786246","https://openalex.org/W1975707871","https://openalex.org/W2000174080"],"abstract_inverted_index":{"The":[0,131],"Blue":[1],"Gene/Q":[2],"(BG/Q)":[3],"machine":[4],"is":[5,135],"the":[6,9,70,74,105,108,114,119,127],"latest":[7],"in":[8,113],"line":[10],"of":[11,69,73,88,98,107,139,150],"IBM":[12],"massively":[13],"parallel":[14],"supercomputers,":[15],"designed":[16],"to":[17,19,48,61],"scale":[18],"262,144":[20],"nodes":[21,41],"and":[22,42,77,117],"16":[23],"million":[24],"threads.":[25,32],"Each":[26],"BG/Q":[27,75,115],"node":[28],"has":[29],"68":[30],"hardware":[31],"Hybrid":[33],"programming":[34],"paradigms,":[35],"which":[36],"use":[37,106],"message":[38],"passing":[39],"among":[40],"multi-threading":[43],"within":[44],"nodes,":[45],"enable":[46],"applications":[47],"achieve":[49,81,147],"high":[50],"throughput":[51,138],"on":[52,91,121],"BG/Q.":[53],"In":[54],"this":[55],"paper,":[56],"we":[57],"present":[58],"scalable":[59],"algorithms":[60],"optimize":[62],"MPI":[63,93,157],"collective":[64,78],"operations":[65],"by":[66,104,126],"taking":[67],"advantage":[68],"various":[71],"features":[72],"torus":[76],"networks.":[79],"We":[80,95,145],"an":[82],"8":[83],"byte":[84],"double-sum":[85,143],"MPI_Allreduce":[86],"latency":[87],"10.25":[89],"ms":[90],"1,572,864":[92],"ranks.":[94,158],"accelerate":[96],"summing":[97],"network":[99,151],"packets":[100],"with":[101,155],"local":[102],"buffers":[103],"Quad":[109],"Processing":[110],"SIMD":[111],"unit":[112],"cores":[116],"executing":[118],"sums":[120],"multiple":[122],"communication":[123,129],"threads":[124],"supported":[125],"optimized":[128],"libraries.":[130],"achieved":[132],"net":[133],"gain":[134],"a":[136],"peak":[137,152],"6.3":[140],"GB/s":[141],"for":[142,153],"allreduce.":[144],"also":[146],"over":[148],"90%":[149],"MPI_Alltoall":[154],"65,536":[156]},"counts_by_year":[{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
