{"id":"https://openalex.org/W3048271386","doi":"https://doi.org/10.1109/tpds.2020.3015148","title":"Multi-GPU Parallelization of the NAS Multi-Zone Parallel Benchmarks","display_name":"Multi-GPU Parallelization of the NAS Multi-Zone Parallel Benchmarks","publication_year":2020,"publication_date":"2020-08-07","ids":{"openalex":"https://openalex.org/W3048271386","doi":"https://doi.org/10.1109/tpds.2020.3015148","mag":"3048271386"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2020.3015148","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2020.3015148","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://hdl.handle.net/2117/334984","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036427962","display_name":"Marc Gonz\u00e1lez","orcid":"https://orcid.org/0000-0002-3780-1106"},"institutions":[{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Marc Gonzalez","raw_affiliation_strings":["Computer Architecture Department, Universitat Polit\u00e8cnica de Catalunya - Barcelona Tech, Barcelona, Spain"],"raw_orcid":"https://orcid.org/0000-0002-3780-1106","affiliations":[{"raw_affiliation_string":"Computer Architecture Department, Universitat Polit\u00e8cnica de Catalunya - Barcelona Tech, Barcelona, Spain","institution_ids":["https://openalex.org/I9617848"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000717476","display_name":"Enric Morancho","orcid":"https://orcid.org/0000-0003-2403-8145"},"institutions":[{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Enric Morancho","raw_affiliation_strings":["Computer Architecture Department, Universitat Polit\u00e8cnica de Catalunya - Barcelona Tech, Barcelona, Spain"],"raw_orcid":"https://orcid.org/0000-0003-2403-8145","affiliations":[{"raw_affiliation_string":"Computer Architecture Department, Universitat Polit\u00e8cnica de Catalunya - Barcelona Tech, Barcelona, Spain","institution_ids":["https://openalex.org/I9617848"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.7081,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.68234963,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"32","issue":"1","first_page":"229","last_page":"241"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9140117168426514},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7616039514541626},{"id":"https://openalex.org/keywords/ibm","display_name":"IBM","score":0.6439822316169739},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.597453773021698},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.5578137636184692},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.507072925567627},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.32466620206832886},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.139031320810318}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9140117168426514},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7616039514541626},{"id":"https://openalex.org/C70388272","wikidata":"https://www.wikidata.org/wiki/Q5968558","display_name":"IBM","level":2,"score":0.6439822316169739},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.597453773021698},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.5578137636184692},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.507072925567627},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.32466620206832886},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.139031320810318},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C171250308","wikidata":"https://www.wikidata.org/wiki/Q11468","display_name":"Nanotechnology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpds.2020.3015148","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2020.3015148","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},{"id":"pmh:oai:upcommons.upc.edu:2117/334984","is_oa":true,"landing_page_url":"http://hdl.handle.net/2117/334984","pdf_url":null,"source":{"id":"https://openalex.org/S4377196262","display_name":"UPCommons institutional repository (Universitat Polit\u00e8cnica de Catalunya)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I9617848","host_organization_name":"Universitat Polit\u00e8cnica de Catalunya","host_organization_lineage":["https://openalex.org/I9617848"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"pmh:oai:upcommons.upc.edu:2117/334984","is_oa":true,"landing_page_url":"http://hdl.handle.net/2117/334984","pdf_url":null,"source":{"id":"https://openalex.org/S4377196262","display_name":"UPCommons institutional repository (Universitat Polit\u00e8cnica de Catalunya)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I9617848","host_organization_name":"Universitat Polit\u00e8cnica de Catalunya","host_organization_lineage":["https://openalex.org/I9617848"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2436365243","display_name":null,"funder_award_id":"2014-SGR-1051","funder_id":"https://openalex.org/F4320321505","funder_display_name":"Generalitat de Catalunya"},{"id":"https://openalex.org/G7096924396","display_name":null,"funder_award_id":"TIN2015-65316","funder_id":"https://openalex.org/F4320321505","funder_display_name":"Generalitat de Catalunya"},{"id":"https://openalex.org/G7291222407","display_name":null,"funder_award_id":"TIN2015-65316","funder_id":"https://openalex.org/F4320323737","funder_display_name":"Ministerio de Ciencia y Tecnolog\u00eda"},{"id":"https://openalex.org/G7441335220","display_name":null,"funder_award_id":"TIN2015-65316-P","funder_id":"https://openalex.org/F4320323737","funder_display_name":"Ministerio de Ciencia y Tecnolog\u00eda"}],"funders":[{"id":"https://openalex.org/F4320321505","display_name":"Generalitat de Catalunya","ror":"https://ror.org/01bg62x04"},{"id":"https://openalex.org/F4320323737","display_name":"Ministerio de Ciencia y Tecnolog\u00eda","ror":"https://ror.org/034900433"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W782703657","https://openalex.org/W787130922","https://openalex.org/W1546315506","https://openalex.org/W1887586770","https://openalex.org/W1968143987","https://openalex.org/W1972906829","https://openalex.org/W2011652244","https://openalex.org/W2024639384","https://openalex.org/W2037574360","https://openalex.org/W2067280503","https://openalex.org/W2073593865","https://openalex.org/W2083734282","https://openalex.org/W2086500526","https://openalex.org/W2097686029","https://openalex.org/W2111639258","https://openalex.org/W2112121929","https://openalex.org/W2118717320","https://openalex.org/W2135644880","https://openalex.org/W2140438747","https://openalex.org/W2150303942","https://openalex.org/W2152129144","https://openalex.org/W2271840356","https://openalex.org/W2408329068","https://openalex.org/W2505968561","https://openalex.org/W2584314560","https://openalex.org/W4247103459","https://openalex.org/W4250908905","https://openalex.org/W6622661583","https://openalex.org/W6632597477","https://openalex.org/W6680930050","https://openalex.org/W6694517276","https://openalex.org/W6713665523"],"related_works":["https://openalex.org/W17155033","https://openalex.org/W3207760230","https://openalex.org/W1496222301","https://openalex.org/W1590307681","https://openalex.org/W4312814274","https://openalex.org/W4285370786","https://openalex.org/W2296488620","https://openalex.org/W2358353312","https://openalex.org/W2251285835","https://openalex.org/W1599154403"],"abstract_inverted_index":{"GPU-based":[0],"computing":[1],"systems":[2],"have":[3,15],"become":[4],"a":[5,38,79,104,176],"widely":[6],"accepted":[7],"solution":[8],"for":[9],"the":[10,31,43,55,62,65,89,129,132,147,148,152,158,183,246,249,255,258],"high-performance-computing":[11],"(HPC)":[12],"domain.":[13],"GPUs":[14,261],"shown":[16],"highly":[17,140],"competitive":[18],"performance-per-watt":[19],"ratios":[20],"and":[21,74,84,116,171,212,231,251],"can":[22],"exploit":[23],"an":[24,193,202],"astonishing":[25],"level":[26,272],"of":[27,34,45,49,92,107,151,206,245,254,260],"parallelism.":[28],"However,":[29],"exploiting":[30],"peak":[32],"performance":[33,91,138],"such":[35],"devices":[36,71],"is":[37,139,262],"challenge,":[39],"mainly":[40],"due":[41],"to":[42,126,142,174,181,227,234],"combination":[44],"two":[46],"essential":[47],"aspects":[48],"multi-GPU":[50,97,105,220],"execution.":[51],"On":[52,64,201],"one":[53],"hand,":[54,67],"workload":[56,130],"should":[57,75],"be":[58,76],"distributed":[59],"evenly":[60,127],"among":[61,131],"GPUs.":[63,133],"other":[66],"communications":[68],"between":[69,81,178,248],"GPU":[70,215],"are":[72,154],"costly":[73],"minimized.":[77],"Therefore,":[78],"trade-of":[80],"work-distribution":[82,122,159],"schemes":[83,160],"communication":[85,115,149,252],"overheads":[86],"will":[87],"condition":[88],"overall":[90,184],"parallel":[93],"applications":[94,153,256],"run":[95],"on":[96],"systems.":[98],"In":[99,165,186],"this":[100,143],"article":[101,239],"we":[102,167,188],"present":[103],"implementation":[106],"NAS":[108],"Multi-Zone":[109],"Parallel":[110],"Benchmarks":[111],"(which":[112],"execution":[113,224],"alternate":[114],"computational":[117,163,250],"phases).":[118],"We":[119,264],"propose":[120],"several":[121],"strategies":[123],"that":[124,137,266],"try":[125],"distribute":[128],"Our":[134],"evaluations":[135],"show":[136],"sensitive":[141],"distribution":[144],"strategy,":[145],"as":[146,257,273],"phases":[150,180,253],"heavily":[155],"affected":[156],"by":[157],"applied":[161],"in":[162,243],"phases.":[164],"particular,":[166],"consider":[168],"Static,":[169],"Dynamic,":[170],"Guided":[172,267],"schedulers":[173,191,268],"find":[175],"trade-off":[177],"both":[179],"maximize":[182],"performance.":[185],"addition,":[187],"compare":[189],"those":[190],"with":[192],"optimal":[194,274],"scheduler":[195],"computed":[196],"offline":[197],"using":[198],"IBM":[199,209],"CPLEX.":[200],"evaluation":[203],"environment":[204],"composed":[205],"2":[207],"x":[208,214],"Power9":[210],"8335-GTH":[211],"4":[213],"NVIDIA":[216],"V100":[217],"(Volta),":[218],"our":[219],"parallelization":[221],"outperforms":[222],"single-GPU":[223],"from":[225,232],"1.48x":[226],"1.86x":[228],"(2":[229],"GPUs)":[230],"1.75x":[233],"3.54x":[235],"(4":[236],"GPUs).":[237],"This":[238],"analyses":[240],"these":[241],"improvements":[242],"terms":[244],"relationship":[247],"number":[259],"increased.":[263],"prove":[265],"perform":[269],"at":[270],"similar":[271],"schedulers.":[275]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
