{"id":"https://openalex.org/W4225275477","doi":"https://doi.org/10.1145/3529538.3529993","title":"Performance analysis of matrix-free conjugate gradient kernels using SYCL","display_name":"Performance analysis of matrix-free conjugate gradient kernels using SYCL","publication_year":2022,"publication_date":"2022-05-02","ids":{"openalex":"https://openalex.org/W4225275477","doi":"https://doi.org/10.1145/3529538.3529993"},"language":"en","primary_location":{"id":"doi:10.1145/3529538.3529993","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3529538.3529993","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3529538.3529993","source":{"id":"https://openalex.org/S4306420323","display_name":"International Workshop on OpenCL","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Workshop on OpenCL","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3529538.3529993","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044415950","display_name":"Igor A. Baratta","orcid":"https://orcid.org/0000-0003-4298-2973"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Igor Baratta","raw_affiliation_strings":["Department of Engineering, University of Cambridge, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Department of Engineering, University of Cambridge, United Kingdom","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035495121","display_name":"Chris Richardson","orcid":"https://orcid.org/0000-0003-3137-1392"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Chris Richardson","raw_affiliation_strings":["BP Institute, University of Cambridge, United Kingdom"],"affiliations":[{"raw_affiliation_string":"BP Institute, University of Cambridge, United Kingdom","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030408094","display_name":"Garth N. Wells","orcid":"https://orcid.org/0000-0001-5291-7951"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Garth Wells","raw_affiliation_strings":["Department of Engineering, University of Cambridge, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Department of Engineering, University of Cambridge, United Kingdom","institution_ids":["https://openalex.org/I241749"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5044415950"],"corresponding_institution_ids":["https://openalex.org/I241749"],"apc_list":null,"apc_paid":null,"fwci":1.0427,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.71102041,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7075868248939514},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6664111018180847},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.6534621119499207},{"id":"https://openalex.org/keywords/conjugate-gradient-method","display_name":"Conjugate gradient method","score":0.5756122469902039},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5726723670959473},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4353901445865631},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.33113962411880493},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2518021762371063}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7075868248939514},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6664111018180847},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.6534621119499207},{"id":"https://openalex.org/C81184566","wikidata":"https://www.wikidata.org/wiki/Q1191895","display_name":"Conjugate gradient method","level":2,"score":0.5756122469902039},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5726723670959473},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4353901445865631},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.33113962411880493},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2518021762371063},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3529538.3529993","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3529538.3529993","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3529538.3529993","source":{"id":"https://openalex.org/S4306420323","display_name":"International Workshop on OpenCL","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Workshop on OpenCL","raw_type":"proceedings-article"},{"id":"pmh:doi:10.17863/cam.83190","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference Object"},{"id":"pmh:oai:www.repository.cam.ac.uk:1810/335753","is_oa":false,"landing_page_url":"https://www.repository.cam.ac.uk/handle/1810/335753","pdf_url":null,"source":{"id":"https://openalex.org/S4306401777","display_name":"Apollo (University of Cambridge)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I241749","host_organization_name":"University of Cambridge","host_organization_lineage":["https://openalex.org/I241749"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference Object"},{"id":"doi:10.17863/cam.83190","is_oa":true,"landing_page_url":"https://doi.org/10.17863/cam.83190","pdf_url":null,"source":{"id":"https://openalex.org/S7407050737","display_name":"Apollo","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1145/3529538.3529993","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3529538.3529993","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3529538.3529993","source":{"id":"https://openalex.org/S4306420323","display_name":"International Workshop on OpenCL","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Workshop on OpenCL","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.4699999988079071}],"awards":[{"id":"https://openalex.org/G1934935867","display_name":null,"funder_award_id":"Engineering and Physical Sciences R","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G2465893628","display_name":"Peta-5: A National Facility for Petascale Data Intensive Computation and Analytics","funder_award_id":"EP/P020259/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G289141369","display_name":null,"funder_award_id":"EP/S005072","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G3246325849","display_name":null,"funder_award_id":"EP/S005072/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G3899621908","display_name":"Strategic Partnership in Computational Science for Advanced Simulation and Modelling of Engineering Systems - ASiMoV","funder_award_id":"EP/S005072/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G5258663329","display_name":null,"funder_award_id":"EP/P020259/","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G5368029276","display_name":null,"funder_award_id":"EP/P020259/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G6661763476","display_name":null,"funder_award_id":"EP/P020259/1","funder_id":"https://openalex.org/F4320320273","funder_display_name":"University of Cambridge"},{"id":"https://openalex.org/G7517786979","display_name":null,"funder_award_id":"EP/N018877/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G7729622549","display_name":null,"funder_award_id":"EP/P020259/1)","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G8719353587","display_name":null,"funder_award_id":"EP/P0","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320312927","display_name":"Dell EMC","ror":"https://ror.org/05rejmm18"},{"id":"https://openalex.org/F4320320273","display_name":"University of Cambridge","ror":"https://ror.org/013meh722"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"},{"id":"https://openalex.org/F4320334632","display_name":"Science and Technology Facilities Council","ror":"https://ror.org/057g20z61"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4225275477.pdf","grobid_xml":"https://content.openalex.org/works/W4225275477.grobid-xml"},"referenced_works_count":10,"referenced_works":["https://openalex.org/W1575701986","https://openalex.org/W2059300917","https://openalex.org/W2180959024","https://openalex.org/W2963610942","https://openalex.org/W3016268593","https://openalex.org/W3017538311","https://openalex.org/W3017954454","https://openalex.org/W3144168681","https://openalex.org/W4237468342","https://openalex.org/W4243474409"],"related_works":["https://openalex.org/W3062287","https://openalex.org/W2380390332","https://openalex.org/W2742145873","https://openalex.org/W2023572661","https://openalex.org/W4245975140","https://openalex.org/W3189307731","https://openalex.org/W2949962288","https://openalex.org/W2364686214","https://openalex.org/W1428699136","https://openalex.org/W1998560227"],"abstract_inverted_index":{"We":[0],"examine":[1],"the":[2,9,49,56,63,79,84,93,98,102,126,133,157,165,169],"performance":[3,73,100,178],"of":[4,8,18,101,148,164,188],"matrix-free":[5,64],"SYCL":[6,39,95,127,153,166],"implementations":[7,47,54,96,167],"conjugate":[10],"gradient":[11],"method":[12],"for":[13,106,162],"solving":[14],"sparse":[15],"linear":[16],"systems":[17],"equations.":[19],"Performance":[20],"is":[21,75,114,129,143],"tested":[22],"on":[23,48,55,121,156,168],"an":[24],"NVIDIA":[25],"A100-80GB":[26],"device":[27],"and":[28,41,52,70,83,159,174,192],"a":[29,71,118],"dual":[30],"socket":[31],"Intel":[32],"Ice":[33],"Lake":[34],"CPU":[35,57,170],"node":[36],"using":[37],"different":[38],"implementations,":[40],"compared":[42],"to":[43,77,116,132],"CUDA":[44],"BLAS":[45],"(cuBLAS)":[46],"A100":[50],"GPU":[51,158],"MKL":[53],"node.":[58],"All":[59],"considered":[60,94],"kernels":[61],"in":[62,90,139,184],"implementation":[65,136],"are":[66],"memory":[67,81],"bandwidth":[68,82],"limited,":[69],"simple":[72],"model":[74],"applied":[76],"estimate":[78],"asymptotic":[80,99],"latency.":[85],"Our":[86],"experiments":[87],"show":[88],"that":[89,182],"most":[91],"cases":[92,125,141,185],"match":[97],"reference":[103,134],"implementations.":[104],"However,":[105],"smaller":[107],"but":[108,138],"practically":[109],"relevant":[110],"problem":[111,180],"sizes":[112,181],"latency":[113,128,176],"observed":[115],"have":[117],"significant":[119],"impact":[120],"performance.":[122,197],"For":[123],"some":[124],"reasonably":[130],"close":[131],"(cuBLAS/MKL)":[135],"latency,":[137,173],"other":[140],"it":[142],"more":[144],"than":[145],"one":[146,163],"order":[147],"magnitude":[149],"greater.":[150],"In":[151],"particular,":[152],"built-in":[154],"reductions":[155],"all":[160],"operations":[161],"exhibit":[171],"high":[172],"this":[175],"limits":[177],"at":[179],"can":[183,193],"be":[186],"representative":[187],"full":[189],"application":[190],"simulations,":[191],"degrade":[194],"strong":[195],"scaling":[196]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
