{"id":"https://openalex.org/W2550750604","doi":"https://doi.org/10.1177/1094342016672720","title":"Reaching bandwidth saturation using transparent injection parallelization","display_name":"Reaching bandwidth saturation using transparent injection parallelization","publication_year":2016,"publication_date":"2016-11-08","ids":{"openalex":"https://openalex.org/W2550750604","doi":"https://doi.org/10.1177/1094342016672720","mag":"2550750604"},"language":"en","primary_location":{"id":"doi:10.1177/1094342016672720","is_oa":false,"landing_page_url":"https://doi.org/10.1177/1094342016672720","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.osti.gov/biblio/1437694","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013775399","display_name":"Nicholas Chaimov","orcid":"https://orcid.org/0000-0001-7807-7620"},"institutions":[{"id":"https://openalex.org/I181233156","display_name":"University of Oregon","ror":"https://ror.org/0293rh119","country_code":"US","type":"education","lineage":["https://openalex.org/I181233156"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Nicholas Chaimov","raw_affiliation_strings":["University of Oregon, OR, USA"],"affiliations":[{"raw_affiliation_string":"University of Oregon, OR, USA","institution_ids":["https://openalex.org/I181233156"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062016419","display_name":"Khaled Z. Ibrahim","orcid":"https://orcid.org/0009-0004-5362-3612"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Khaled Z Ibrahim","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, CA, USA"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, CA, USA","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102746800","display_name":"Samuel Williams","orcid":"https://orcid.org/0000-0002-8327-5717"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Samuel Williams","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, CA, USA"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, CA, USA","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023233400","display_name":"Costin Iancu","orcid":"https://orcid.org/0000-0001-7845-2427"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Costin Iancu","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, CA, USA"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, CA, USA","institution_ids":["https://openalex.org/I148283060"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5013775399"],"corresponding_institution_ids":["https://openalex.org/I181233156"],"apc_list":null,"apc_paid":null,"fwci":0.3218,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.6012129,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"31","issue":"5","first_page":"405","last_page":"421"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.9368979334831238},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8605740070343018},{"id":"https://openalex.org/keywords/infiniband","display_name":"InfiniBand","score":0.7954028844833374},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7553405165672302},{"id":"https://openalex.org/keywords/concurrency","display_name":"Concurrency","score":0.6537324786186218},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.5041686296463013},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4915808141231537},{"id":"https://openalex.org/keywords/multigrid-method","display_name":"Multigrid method","score":0.48522520065307617},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4173157811164856},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.2734237611293793},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.1105460524559021}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.9368979334831238},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8605740070343018},{"id":"https://openalex.org/C2781030343","wikidata":"https://www.wikidata.org/wiki/Q922437","display_name":"InfiniBand","level":2,"score":0.7954028844833374},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7553405165672302},{"id":"https://openalex.org/C193702766","wikidata":"https://www.wikidata.org/wiki/Q1414548","display_name":"Concurrency","level":2,"score":0.6537324786186218},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.5041686296463013},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4915808141231537},{"id":"https://openalex.org/C137119250","wikidata":"https://www.wikidata.org/wiki/Q1413101","display_name":"Multigrid method","level":3,"score":0.48522520065307617},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4173157811164856},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2734237611293793},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.1105460524559021},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C93779851","wikidata":"https://www.wikidata.org/wiki/Q271977","display_name":"Partial differential equation","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1177/1094342016672720","is_oa":false,"landing_page_url":"https://doi.org/10.1177/1094342016672720","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},{"id":"pmh:oai:osti.gov:1437694","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/1437694","pdf_url":null,"source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"pmh:oai:osti.gov:1437694","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/1437694","pdf_url":null,"source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1489425746","display_name":null,"funder_award_id":"DE-AC05-00OR22725","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G1977829796","display_name":null,"funder_award_id":"DE-AC02-05-CH-11231","funder_id":"https://openalex.org/F4320337506","funder_display_name":"Advanced Scientific Computing Research"}],"funders":[{"id":"https://openalex.org/F4320332359","display_name":"Office of Science","ror":"https://ror.org/00mmn6b08"},{"id":"https://openalex.org/F4320337506","display_name":"Advanced Scientific Computing Research","ror":"https://ror.org/0012c7r22"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W313895472","https://openalex.org/W338128962","https://openalex.org/W1579891937","https://openalex.org/W1861151562","https://openalex.org/W1975138713","https://openalex.org/W1975372360","https://openalex.org/W1983545421","https://openalex.org/W1985933012","https://openalex.org/W2022263640","https://openalex.org/W2034714579","https://openalex.org/W2063123664","https://openalex.org/W2076028817","https://openalex.org/W2097718502","https://openalex.org/W2102182691","https://openalex.org/W2102298809","https://openalex.org/W2109065830","https://openalex.org/W2121404908","https://openalex.org/W2131613942","https://openalex.org/W2138723837","https://openalex.org/W2138782497","https://openalex.org/W2144607581","https://openalex.org/W2150511820","https://openalex.org/W2157664430","https://openalex.org/W2163552442","https://openalex.org/W4231725297","https://openalex.org/W4237371203","https://openalex.org/W4241696959","https://openalex.org/W4253621724"],"related_works":["https://openalex.org/W2406331183","https://openalex.org/W2363988059","https://openalex.org/W2792231649","https://openalex.org/W2053215237","https://openalex.org/W1939358748","https://openalex.org/W1995426833","https://openalex.org/W2613555492","https://openalex.org/W3150273738","https://openalex.org/W2141107779","https://openalex.org/W2983282793"],"abstract_inverted_index":{"Although":[0],"logically":[1],"available,":[2],"applications":[3],"may":[4],"not":[5],"exploit":[6],"enough":[7],"instantaneous":[8,48],"communication":[9,68,76],"concurrency":[10,50,101],"to":[11,44,52,74,85],"maximize":[12],"network":[13,49,87],"utilization":[14],"on":[15,120,132,155,172,199],"HPC":[16],"systems.":[17],"This":[18,150],"is":[19],"exacerbated":[20],"in":[21],"hybrid":[22,184],"programming":[23],"models":[24,84],"that":[25],"combine":[26],"single":[27],"program":[28],"multiple":[29,75],"data":[30],"with":[31],"OpenMP":[32],"or":[33],"CUDA.":[34],"We":[35,165],"present":[36],"the":[37,47,59,92,187],"design":[38],"of":[39,58],"a":[40,159],"\u201cmulti-threaded\u201d":[41],"runtime":[42,66],"able":[43],"transparently":[45],"increase":[46],"and":[51,62,96,109,122],"provide":[53],"near":[54],"saturation":[55],"bandwidth,":[56],"independent":[57],"application":[60,71,98,182],"configuration":[61],"dynamic":[63],"behavior.":[64],"The":[65,78],"offloads":[67],"requests":[69],"from":[70],"level":[72,99],"tasks":[73],"servers.":[77],"servers":[79],"use":[80],"system":[81],"specific":[82],"performance":[83],"attain":[86],"saturation.":[88],"Our":[89],"techniques":[90],"alleviate":[91],"need":[93],"for":[94,116,128,158,175],"spatial":[95],"temporal":[97],"message":[100,107],"optimizations.":[102],"Experimental":[103],"results":[104],"show":[105],"improved":[106],"throughput":[108],"bandwidth":[110],"by":[111,123],"as":[112,114,124,126,140,145,147,167,169,194,196],"much":[113,125,146,168,195],"150%":[115],"4":[117,129],"KB":[118,130],"messages":[119,131],"InfiniBand":[121],"120%":[127],"Cray":[133],"Aries.":[134],"For":[135,186],"more":[136],"complex":[137],"operations":[138],"such":[139],"all-to-all":[141],"collectives,":[142],"we":[143,192],"observe":[144,166,193],"30%":[148],"speedup.":[149],"translates":[151],"into":[152],"23%":[153],"speedup":[154,171,198],"12,288":[156],"cores":[157,174],"NAS":[160],"FT":[161],"implemented":[162],"using":[163,183],"FFTW.":[164],"76%":[170],"1500":[173],"an":[176],"already":[177],"optimized":[178],"UPC+OpenMP":[179],"geometric":[180,188],"multigrid":[181,189],"parallelism.":[185],"GPU":[190],"implementation,":[191],"44%":[197],"512":[200],"GPUs.":[201]},"counts_by_year":[{"year":2017,"cited_by_count":1}],"updated_date":"2026-03-17T09:09:15.849793","created_date":"2025-10-10T00:00:00"}
