{"id":"https://openalex.org/W2995914412","doi":"https://doi.org/10.1147/jrd.2019.2955944","title":"An open-source solution to performance portability for Summit and Sierra supercomputers","display_name":"An open-source solution to performance portability for Summit and Sierra supercomputers","publication_year":2019,"publication_date":"2019-12-10","ids":{"openalex":"https://openalex.org/W2995914412","doi":"https://doi.org/10.1147/jrd.2019.2955944","mag":"2995914412"},"language":"en","primary_location":{"id":"doi:10.1147/jrd.2019.2955944","is_oa":false,"landing_page_url":"https://doi.org/10.1147/jrd.2019.2955944","pdf_url":null,"source":{"id":"https://openalex.org/S4210219925","display_name":"IBM Journal of Research and Development","issn_l":"0018-8646","issn":["0018-8646","2151-8556"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320652","host_organization_name":"IBM","host_organization_lineage":["https://openalex.org/P4310320652"],"host_organization_lineage_names":["IBM"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IBM Journal of Research and Development","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028956723","display_name":"Gheorghe-Teodor Bercea","orcid":"https://orcid.org/0000-0003-4331-4360"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"G. T. Bercea","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018762393","display_name":"Alexey Bataev","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"A. Bataev","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068763494","display_name":"Alexandre E. Eichenberger","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"A. E. Eichenberger","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053563062","display_name":"Carlo Bertolli","orcid":"https://orcid.org/0009-0006-6852-1445"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"C. Bertolli","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5041893114","display_name":"John K. P. O'Brien","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"J. K. O'Brien","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5028956723"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.4815,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.63396817,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"64","issue":"3/4","first_page":"12:1","last_page":"12:23"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8817554116249084},{"id":"https://openalex.org/keywords/software-portability","display_name":"Software portability","score":0.8148826360702515},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.7261715531349182},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5963413119316101},{"id":"https://openalex.org/keywords/programming-paradigm","display_name":"Programming paradigm","score":0.5406050682067871},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.47917214035987854},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4588715136051178},{"id":"https://openalex.org/keywords/generality","display_name":"Generality","score":0.4467334747314453},{"id":"https://openalex.org/keywords/runtime-system","display_name":"Runtime system","score":0.43224218487739563},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.4309292435646057},{"id":"https://openalex.org/keywords/x86","display_name":"x86","score":0.4255075454711914},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.41008636355400085},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3987123370170593},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.18273013830184937}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8817554116249084},{"id":"https://openalex.org/C63000827","wikidata":"https://www.wikidata.org/wiki/Q3080428","display_name":"Software portability","level":2,"score":0.8148826360702515},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.7261715531349182},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5963413119316101},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.5406050682067871},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.47917214035987854},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4588715136051178},{"id":"https://openalex.org/C2780767217","wikidata":"https://www.wikidata.org/wiki/Q5532421","display_name":"Generality","level":2,"score":0.4467334747314453},{"id":"https://openalex.org/C2780870223","wikidata":"https://www.wikidata.org/wiki/Q1004415","display_name":"Runtime system","level":2,"score":0.43224218487739563},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.4309292435646057},{"id":"https://openalex.org/C170723468","wikidata":"https://www.wikidata.org/wiki/Q182933","display_name":"x86","level":3,"score":0.4255075454711914},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.41008636355400085},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3987123370170593},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.18273013830184937},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C542102704","wikidata":"https://www.wikidata.org/wiki/Q183257","display_name":"Psychotherapist","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1147/jrd.2019.2955944","is_oa":false,"landing_page_url":"https://doi.org/10.1147/jrd.2019.2955944","pdf_url":null,"source":{"id":"https://openalex.org/S4210219925","display_name":"IBM Journal of Research and Development","issn_l":"0018-8646","issn":["0018-8646","2151-8556"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320652","host_organization_name":"IBM","host_organization_lineage":["https://openalex.org/P4310320652"],"host_organization_lineage_names":["IBM"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IBM Journal of Research and Development","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1567366217","https://openalex.org/W1977684034","https://openalex.org/W2018408367","https://openalex.org/W2019143817","https://openalex.org/W2038646072","https://openalex.org/W2056862683","https://openalex.org/W2078794610","https://openalex.org/W2098426571","https://openalex.org/W2118880662","https://openalex.org/W2126026097","https://openalex.org/W2127013169","https://openalex.org/W2142427060","https://openalex.org/W2183420951","https://openalex.org/W2211541975","https://openalex.org/W2267470509","https://openalex.org/W2294178969","https://openalex.org/W2301898438","https://openalex.org/W2521614036","https://openalex.org/W2767144262","https://openalex.org/W2786644468","https://openalex.org/W2885571639","https://openalex.org/W2890156445","https://openalex.org/W3105806596","https://openalex.org/W4234180294","https://openalex.org/W4240822085","https://openalex.org/W4245654886","https://openalex.org/W4247863936","https://openalex.org/W6678848862","https://openalex.org/W6686093555","https://openalex.org/W6688460505"],"related_works":["https://openalex.org/W3020739840","https://openalex.org/W4386875822","https://openalex.org/W3177128669","https://openalex.org/W2122078011","https://openalex.org/W4385574943","https://openalex.org/W1506346046","https://openalex.org/W2022941486","https://openalex.org/W3004785956","https://openalex.org/W2111416043","https://openalex.org/W4200226193"],"abstract_inverted_index":{"Programming":[0],"models":[1],"that":[2,112,127,184,197],"use":[3],"a":[4,39,66,88,122,135],"higher":[5],"level":[6],"of":[7,38,83,142,174,198],"abstraction":[8],"to":[9,104,138,149],"express":[10],"parallelism":[11],"can":[12],"target":[13],"both":[14,93],"CPUs":[15],"and":[16,23,36,52,61,96,109,134,179,193],"any":[17],"attached":[18],"devices,":[19],"alleviating":[20],"the":[21,33,50,56,70,81,84,114,118,129,140,147,191],"maintainability":[22],"portability":[24],"concerns":[25],"facing":[26],"today's":[27],"heterogenous":[28],"systems.":[29],"This":[30],"article":[31],"describes":[32],"design,":[34],"implementation,":[35],"delivery":[37],"compliant":[40],"OpenMP":[41,62,85,115],"device":[42],"offloading":[43],"implementation":[44,148,160],"for":[45,76,155,185],"IBM-NVIDIA":[46],"heterogeneous":[47],"servers":[48],"composing":[49],"Summit":[51,192],"Sierra":[53,194],"supercomputers":[54],"in":[55],"mainline":[57],"open-source":[58],"Clang/LLVM":[59],"compiler":[60],"runtime":[63,175,180],"projects.":[64],"From":[65],"performance":[67,95,189],"perspective,":[68],"reconciling":[69],"GPU":[71,130],"programming":[72,102],"model,":[73],"best":[74],"suited":[75],"massively":[77],"parallel":[78],"workloads,":[79],"with":[80,152],"generality":[82,141],"model":[86,133],"was":[87],"significant":[89],"challenge.":[90],"To":[91],"achieve":[92],"high":[94],"full":[97],"portability,":[98],"we":[99,120],"map":[100],"high-level":[101],"patterns":[103],"fine-tuned":[105],"code":[106],"generation":[107],"schemes":[108,154],"customized":[110],"runtimes":[111],"preserve":[113],"semantics.":[116],"In":[117],"compiler,":[119],"implement":[121],"low-overhead":[123],"single-program":[124],"multiple-data":[125],"scheme":[126,137],"leverages":[128],"native":[131,200],"execution":[132],"fallback":[136],"support":[139],"OpenMP.":[143],"Modular":[144],"design":[145],"enables":[146],"be":[150],"extended":[151],"new":[153],"frequently":[156],"occurring":[157],"patterns.":[158],"Our":[159],"relies":[161],"on":[162,190],"key":[163],"optimizations:":[164],"sharing":[165],"data":[166],"among":[167],"threads,":[168],"leveraging":[169],"unified":[170],"memory,":[171],"aggressive":[172],"inlining":[173],"calls,":[176],"memory":[177],"coalescing,":[178],"simplification.":[181],"We":[182],"show":[183],"commonly":[186],"used":[187],"patterns,":[188],"GPUs":[195],"matches":[196],"hand-written":[199],"CUDA":[201],"code.":[202]},"counts_by_year":[{"year":2020,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
