{"id":"https://openalex.org/W4386709692","doi":"https://doi.org/10.1145/3605573.3605604","title":"Communication-Avoiding Optimizations for Large-Scale Unstructured-Mesh Applications with OP2","display_name":"Communication-Avoiding Optimizations for Large-Scale Unstructured-Mesh Applications with OP2","publication_year":2023,"publication_date":"2023-08-07","ids":{"openalex":"https://openalex.org/W4386709692","doi":"https://doi.org/10.1145/3605573.3605604"},"language":"en","primary_location":{"id":"doi:10.1145/3605573.3605604","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3605573.3605604","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3605573.3605604","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3605573.3605604","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092863053","display_name":"Suneth Dasantha Ekanayake","orcid":"https://orcid.org/0009-0005-7180-545X"},"institutions":[{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Suneth Dasantha Ekanayake","raw_affiliation_strings":["Computer Science, University of Warwick, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Computer Science, University of Warwick, United Kingdom","institution_ids":["https://openalex.org/I39555362"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048478302","display_name":"Istv\u00e1n Z. Reguly","orcid":"https://orcid.org/0000-0002-4385-4204"},"institutions":[{"id":"https://openalex.org/I31882830","display_name":"P\u00e1zm\u00e1ny P\u00e9ter Catholic University","ror":"https://ror.org/05v9kya57","country_code":"HU","type":"education","lineage":["https://openalex.org/I31882830"]}],"countries":["HU"],"is_corresponding":false,"raw_author_name":"Istv\u00e1n Zolt\u00e1n Reguly","raw_affiliation_strings":["Information Technology and Bionics, P\u00e1zm\u00e1ny P\u00e9ter Catholic University, Hungary"],"affiliations":[{"raw_affiliation_string":"Information Technology and Bionics, P\u00e1zm\u00e1ny P\u00e9ter Catholic University, Hungary","institution_ids":["https://openalex.org/I31882830"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052710314","display_name":"Fabio Luporini","orcid":"https://orcid.org/0000-0001-7161-2942"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fabio Luporini","raw_affiliation_strings":["Devito Codes, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Devito Codes, United Kingdom","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070789282","display_name":"Gihan R. Mudalige","orcid":"https://orcid.org/0000-0002-1398-5174"},"institutions":[{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Gihan Ravideva Mudalige","raw_affiliation_strings":["Computer Science, University of Warwick, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Computer Science, University of Warwick, United Kingdom","institution_ids":["https://openalex.org/I39555362"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5092863053"],"corresponding_institution_ids":["https://openalex.org/I39555362"],"apc_list":null,"apc_paid":null,"fwci":0.9005,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.71541743,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"380","last_page":"391"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8794318437576294},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6812467575073242},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.6777340173721313},{"id":"https://openalex.org/keywords/message-passing","display_name":"Message passing","score":0.5121452808380127},{"id":"https://openalex.org/keywords/gpu-cluster","display_name":"GPU cluster","score":0.5084675550460815},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.49115654826164246},{"id":"https://openalex.org/keywords/loop","display_name":"Loop (graph theory)","score":0.43303316831588745},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.3420169949531555},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1905101239681244}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8794318437576294},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6812467575073242},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.6777340173721313},{"id":"https://openalex.org/C854659","wikidata":"https://www.wikidata.org/wiki/Q1859284","display_name":"Message passing","level":2,"score":0.5121452808380127},{"id":"https://openalex.org/C2781335571","wikidata":"https://www.wikidata.org/wiki/Q2633544","display_name":"GPU cluster","level":3,"score":0.5084675550460815},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.49115654826164246},{"id":"https://openalex.org/C184670325","wikidata":"https://www.wikidata.org/wiki/Q512604","display_name":"Loop (graph theory)","level":2,"score":0.43303316831588745},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3420169949531555},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1905101239681244},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3605573.3605604","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3605573.3605604","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3605573.3605604","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd International Conference on Parallel Processing","raw_type":"proceedings-article"},{"id":"pmh:oai:wrap.warwick.ac.uk:185102","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306400665","display_name":"Warwick Research Archive Portal (University of Warwick)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I39555362","host_organization_name":"University of Warwick","host_organization_lineage":["https://openalex.org/I39555362"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Thesis"},{"id":"pmh:oai:wrap.warwick.ac.uk:176799","is_oa":true,"landing_page_url":"https://wrap.warwick.ac.uk/176799/13/WRAP-Communication-avoiding-optimizations-large-scale-unstructured-mesh-OP2-23.pdf","pdf_url":"https://wrap.warwick.ac.uk/176799/19/WRAP-communication-avoiding-optimizations-large-scale-unstructured-mesh-applications-OP2-Ekanayake-2023.pdf","source":{"id":"https://openalex.org/S4306400665","display_name":"Warwick Research Archive Portal (University of Warwick)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I39555362","host_organization_name":"University of Warwick","host_organization_lineage":["https://openalex.org/I39555362"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Item"}],"best_oa_location":{"id":"doi:10.1145/3605573.3605604","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3605573.3605604","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3605573.3605604","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd International Conference on Parallel Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2153253078","display_name":null,"funder_award_id":"EP/S005072/1, EP/P020267/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G289141369","display_name":null,"funder_award_id":"EP/S005072","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G3246325849","display_name":null,"funder_award_id":"EP/S005072/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G3458110196","display_name":"EPCC Tier 2 HPC Service","funder_award_id":"EP/P020267/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G3899621908","display_name":"Strategic Partnership in Computational Science for Advanced Simulation and Modelling of Engineering Systems - ASiMoV","funder_award_id":"EP/S005072/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G4398685361","display_name":null,"funder_award_id":"EP/P020267/1","funder_id":"https://openalex.org/F4320320332","funder_display_name":"University of Edinburgh"},{"id":"https://openalex.org/G8719353587","display_name":null,"funder_award_id":"EP/P0","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G8988075325","display_name":null,"funder_award_id":"EP/S005072/1","funder_id":"https://openalex.org/F4320320006","funder_display_name":"Royal Society"}],"funders":[{"id":"https://openalex.org/F4320320006","display_name":"Royal Society","ror":"https://ror.org/03wnrjx87"},{"id":"https://openalex.org/F4320320288","display_name":"Rolls-Royce","ror":"https://ror.org/04h08p482"},{"id":"https://openalex.org/F4320320332","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4386709692.pdf","grobid_xml":"https://content.openalex.org/works/W4386709692.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W1153030042","https://openalex.org/W1506135481","https://openalex.org/W1901281515","https://openalex.org/W1979457157","https://openalex.org/W1982811641","https://openalex.org/W1987210269","https://openalex.org/W2032032242","https://openalex.org/W2034761517","https://openalex.org/W2063249715","https://openalex.org/W2064735574","https://openalex.org/W2076517649","https://openalex.org/W2107979247","https://openalex.org/W2608177516","https://openalex.org/W2780077279","https://openalex.org/W2792217880","https://openalex.org/W2960120665","https://openalex.org/W2963179825","https://openalex.org/W3020936970","https://openalex.org/W3105956580","https://openalex.org/W3151489216","https://openalex.org/W3154594620","https://openalex.org/W4247520494","https://openalex.org/W4254778071","https://openalex.org/W4313023190"],"related_works":["https://openalex.org/W2056717482","https://openalex.org/W3189307731","https://openalex.org/W2949962288","https://openalex.org/W2364686214","https://openalex.org/W1428699136","https://openalex.org/W1998560227","https://openalex.org/W2017587301","https://openalex.org/W2030707850","https://openalex.org/W2170611190","https://openalex.org/W2566934642"],"abstract_inverted_index":{"In":[0],"this":[1,223],"paper,":[2],"we":[3,32],"investigate":[4],"data":[5,58],"movement-reducing":[6],"and":[7,10,135,143,182,190,194,216],"communication-avoiding":[8,67],"optimizations":[9,221],"their":[11],"practicable":[12],"implementation":[13],"for":[14,26,36,62,70,151,165,171,222],"large-scale":[15],"unstructured-mesh":[16,28,199],"applications.":[17,226],"Utilizing":[18],"the":[19,23,27,120,154,166,172,192,213,220],"high-level":[20],"abstraction":[21],"of":[22,30,43,57,97,123,141,196,218,225],"OP2":[24,71,86,121],"DSL":[25],"class":[29,224],"codes,":[31],"reason":[33],"about":[34],"techniques":[35,76],"reduced":[37],"communications":[38],"across":[39],"a":[40,46,95,197],"consecutive":[41],"sequence":[42],"loops":[44],"\u2013":[45,161],"loop-chain.":[47],"The":[48,88,108],"careful":[49],"trade-off":[50],"with":[51,102,106,207],"increased":[52],"redundant":[53],"computation":[54],"in":[55,104,168],"place":[56],"movement":[59],"is":[60,72,90,112,130],"analyzed":[61],"distributed-memory":[63],"parallelization.":[64],"A":[65],"new":[66,109,155],"(CA)":[68],"back-end":[69,89,111,157],"designed,":[73],"codifying":[74],"these":[75,169],"such":[77],"that":[78,201],"they":[79],"can":[80,202],"be":[81],"applied":[82,113],"automatically":[83,114],"to":[84,92,115,204],"any":[85],"application.":[87],"extended":[91],"operate":[93],"on":[94,132,138,175],"cluster":[96],"GPUs,":[98],"integrating":[99],"GPU-to-GPU":[100],"communication":[101],"CUDA,":[103],"combination":[105],"MPI.":[107],"CA":[110,156,208],"two":[116],"non-trivial":[117],"applications,":[118],"including":[119],"version":[122],"Rolls-Royce\u2019s":[124],"production":[125],"CFD":[126],"application,":[127],"Hydra.":[128],"Performance":[129],"investigated":[131],"both":[133,176],"CPU":[134],"GPU":[136,186],"clusters":[137],"representative":[139],"problems":[140],"8M":[142],"24M":[144],"node":[145],"mesh":[146,173],"sizes.":[147],"Results":[148],"demonstrate":[149],"how":[150],"select":[152],"configurations":[153],"provides":[158],"between":[159],"30":[160],"65%":[162],"runtime":[163],"reductions":[164],"loop-chains":[167],"applications":[170],"sizes":[174],"an":[177,183],"HPE":[178],"Cray":[179],"EX":[180],"system":[181],"NVIDIA":[184],"V100":[185],"cluster.":[187],"We":[188],"model":[189],"examine":[191],"determinants":[193],"characteristics":[195],"given":[198],"loop-chain":[200],"lead":[203],"performance":[205],"benefits":[206],"techniques,":[209],"providing":[210],"insights":[211],"into":[212],"general":[214],"feasibility":[215],"profitability":[217],"using":[219]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
