{"id":"https://openalex.org/W7116293487","doi":"https://doi.org/10.1145/3754598.3754649","title":"One GPU, Many Ranks: Enabling Performance and Energy-Efficient In-Transit Visualization via Resource Sharing","display_name":"One GPU, Many Ranks: Enabling Performance and Energy-Efficient In-Transit Visualization via Resource Sharing","publication_year":2025,"publication_date":"2025-09-08","ids":{"openalex":"https://openalex.org/W7116293487","doi":"https://doi.org/10.1145/3754598.3754649"},"language":null,"primary_location":{"id":"doi:10.1145/3754598.3754649","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3754598.3754649","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 54th International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058735963","display_name":"M. Irigaray Costa","orcid":null},"institutions":[{"id":"https://openalex.org/I130442723","display_name":"Universidade Federal do Rio Grande do Sul","ror":"https://ror.org/041yk2d64","country_code":"BR","type":"education","lineage":["https://openalex.org/I130442723"]}],"countries":["BR"],"is_corresponding":true,"raw_author_name":"Matheus Costa","raw_affiliation_strings":["Federal University of Rio Grande do Sul, Porto Alegre, Brazil"],"affiliations":[{"raw_affiliation_string":"Federal University of Rio Grande do Sul, Porto Alegre, Brazil","institution_ids":["https://openalex.org/I130442723"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045424215","display_name":"P. Navaux","orcid":null},"institutions":[{"id":"https://openalex.org/I130442723","display_name":"Universidade Federal do Rio Grande do Sul","ror":"https://ror.org/041yk2d64","country_code":"BR","type":"education","lineage":["https://openalex.org/I130442723"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Philippe Navaux","raw_affiliation_strings":["Federal University of Rio Grande do Sul, Porto Alegre, Brazil"],"affiliations":[{"raw_affiliation_string":"Federal University of Rio Grande do Sul, Porto Alegre, Brazil","institution_ids":["https://openalex.org/I130442723"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120921269","display_name":"Silvio Rizzi","orcid":null},"institutions":[{"id":"https://openalex.org/I1282105669","display_name":"Argonne National Laboratory","ror":"https://ror.org/05gvnxz63","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282105669","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Silvio Rizzi","raw_affiliation_strings":["Argonne National Laboratory, Lemont, Illinois, USA"],"affiliations":[{"raw_affiliation_string":"Argonne National Laboratory, Lemont, Illinois, USA","institution_ids":["https://openalex.org/I1282105669"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056127793","display_name":"Arthur F. Lorenzon","orcid":"https://orcid.org/0000-0002-2412-3027"},"institutions":[{"id":"https://openalex.org/I130442723","display_name":"Universidade Federal do Rio Grande do Sul","ror":"https://ror.org/041yk2d64","country_code":"BR","type":"education","lineage":["https://openalex.org/I130442723"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Arthur Lorenzon","raw_affiliation_strings":["Federal University of Rio Grande do Sul, Porto Alegre, Brazil"],"affiliations":[{"raw_affiliation_string":"Federal University of Rio Grande do Sul, Porto Alegre, Brazil","institution_ids":["https://openalex.org/I130442723"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5058735963"],"corresponding_institution_ids":["https://openalex.org/I130442723"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.67331827,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"597","last_page":"606"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.4002000093460083,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.4002000093460083,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.20520000159740448,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.14159999787807465,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.7572000026702881},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.7150999903678894},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.5483999848365784},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.5127999782562256},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5049999952316284},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.4821999967098236},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4603999853134155},{"id":"https://openalex.org/keywords/energy-consumption","display_name":"Energy consumption","score":0.4422000050544739}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8407999873161316},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.7572000026702881},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.7150999903678894},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6119999885559082},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.5483999848365784},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.5127999782562256},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5049999952316284},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.4821999967098236},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4603999853134155},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.4422000050544739},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4334000051021576},{"id":"https://openalex.org/C172367668","wikidata":"https://www.wikidata.org/wiki/Q6504956","display_name":"Data visualization","level":3,"score":0.4235999882221222},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4036000072956085},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.36250001192092896},{"id":"https://openalex.org/C2779965156","wikidata":"https://www.wikidata.org/wiki/Q5227350","display_name":"Data sharing","level":3,"score":0.3409000039100647},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.30469998717308044},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.3005000054836273},{"id":"https://openalex.org/C2778837361","wikidata":"https://www.wikidata.org/wiki/Q2450880","display_name":"Exascale computing","level":3,"score":0.2994000017642975},{"id":"https://openalex.org/C51332947","wikidata":"https://www.wikidata.org/wiki/Q1172305","display_name":"Shared resource","level":2,"score":0.29589998722076416},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.2955000102519989},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.2892000079154968},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.2718000113964081},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.25850000977516174},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.2581999897956848}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3754598.3754649","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3754598.3754649","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 54th International Conference on Parallel Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7836157083511353,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1963503066","https://openalex.org/W2021361430","https://openalex.org/W2158664119","https://openalex.org/W2346404725","https://openalex.org/W2732547479","https://openalex.org/W2944412207","https://openalex.org/W2948524867","https://openalex.org/W2971662856","https://openalex.org/W3022298203","https://openalex.org/W3156426083","https://openalex.org/W3207160066","https://openalex.org/W4200612214","https://openalex.org/W4225148432","https://openalex.org/W4285184890","https://openalex.org/W4285504024","https://openalex.org/W4293079010","https://openalex.org/W4304192541","https://openalex.org/W4313433637","https://openalex.org/W4321636677","https://openalex.org/W4324044681","https://openalex.org/W4387951242","https://openalex.org/W4391053367","https://openalex.org/W4399296903","https://openalex.org/W4401389834","https://openalex.org/W4401408761","https://openalex.org/W4405756372","https://openalex.org/W4409133382"],"related_works":[],"abstract_inverted_index":{"In-transit":[0],"visualization":[1,27,82,129],"has":[2],"become":[3],"essential":[4,178],"in":[5,22,44,80],"high-performance":[6],"computing":[7],"(HPC)":[8],"to":[9,58,76,168,179,195,207,226],"reduce":[10,219],"I/O":[11],"overheads":[12],"and":[13,24,37,62,114,127,143,157,190],"enable":[14],"real-time":[15],"data":[16,144],"analysis.":[17],"However,":[18],"as":[19],"simulations":[20],"grow":[21],"scale":[23],"complexity,":[25],"these":[26],"tasks":[28],"increasingly":[29],"demand":[30],"substantial":[31],"computational":[32],"resources,":[33],"exacerbating":[34],"energy":[35,78,197],"consumption":[36],"limiting":[38],"system":[39,229],"scalability.":[40],"As":[41],"we":[42,71,212],"show":[43,147,172],"this":[45,69],"paper,":[46],"a":[47],"key":[48],"bottleneck":[49],"is":[50,177],"the":[51,97,123,134,153,161,220],"conventional":[52],"one-rank-per-GPU":[53],"allocation":[54],"model,":[55],"which":[56,109,118],"leads":[57],"irregular":[59],"GPU":[60,116],"utilization":[61],"waste":[63],"of":[64],"hardware":[65],"resources.":[66],"To":[67],"tackle":[68],"challenge,":[70],"propose":[72],"using":[73],"GPU-sharing":[74,95,149],"strategies":[75,150,217],"improve":[77,152,180],"efficiency":[79],"in-transit":[81],"without":[83],"compromising":[84],"performance.":[85],"We":[86,170],"evaluate":[87],"six":[88],"distinct":[89],"configurations":[90,184],"built":[91],"upon":[92],"three":[93],"NVIDIA":[94],"mechanisms:":[96],"default":[98],"CUDA":[99],"model":[100],"with":[101],"context":[102,112],"switching":[103],"between":[104,155],"processes,":[105],"Multi-Process":[106],"Service":[107],"(MPS),":[108],"enables":[110],"dynamic":[111],"sharing,":[113],"Multi-Instance":[115],"(MIG),":[117],"provides":[119],"hardware-level":[120],"partitioning.":[121],"Using":[122],"WarpX":[124],"simulation":[125],"code":[126],"Ascent":[128],"framework,":[130],"our":[131],"experiments":[132],"on":[133],"Polaris":[135],"supercomputer":[136],"span":[137],"multiple":[138],"rendering":[139],"techniques,":[140],"node":[141],"counts,":[142],"sizes.":[145],"Results":[146],"that":[148,173,214],"can":[151,218],"trade-off":[154],"performance":[156],"energy,":[158],"represented":[159],"by":[160,166,224],"energy-delay":[162],"product":[163],"(EDP)":[164],"metric,":[165],"up":[167,194,206,225],"81.7%.":[169],"also":[171],"workload-aware":[174],"strategy":[175],"selection":[176],"performance-energy":[181],"efficiency:":[182],"MIG-based":[183],"are":[185],"more":[186],"effective":[187],"for":[188,231],"lightweight":[189],"regular":[191],"workloads,":[192,204],"offering":[193],"64.5%":[196],"savings,":[198],"while":[199],"MPS":[200],"better":[201],"handles":[202],"GPU-intensive":[203],"achieving":[205],"71.1%":[208],"EDP":[209],"improvement.":[210],"Finally,":[211],"demonstrate":[213],"optimized":[215],"sharing":[216],"required":[221],"compute":[222],"nodes":[223],"75%,":[227],"freeing":[228],"resources":[230],"concurrent":[232],"workloads.":[233]},"counts_by_year":[],"updated_date":"2025-12-21T02:06:08.432651","created_date":"2025-12-21T00:00:00"}
