{"id":"https://openalex.org/W3117225375","doi":"https://doi.org/10.1109/sc.2016.67","title":"Accelerating Lattice QCD Multigrid on GPUs Using Fine-Grained Parallelization","display_name":"Accelerating Lattice QCD Multigrid on GPUs Using Fine-Grained Parallelization","publication_year":2016,"publication_date":"2016-11-01","ids":{"openalex":"https://openalex.org/W3117225375","doi":"https://doi.org/10.1109/sc.2016.67","mag":"3117225375"},"language":"en","primary_location":{"id":"doi:10.1109/sc.2016.67","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sc.2016.67","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SC16: International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.osti.gov/biblio/1413692","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046199609","display_name":"M. A. Clark","orcid":"https://orcid.org/0000-0001-5211-2002"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"M. A. Clark","raw_affiliation_strings":["NVIDIA Corporation, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Balint Joo","orcid":null},"institutions":[{"id":"https://openalex.org/I29801172","display_name":"Thomas Jefferson National Accelerator Facility","ror":"https://ror.org/02vwzrd76","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I29801172","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Balint Joo","raw_affiliation_strings":["Thomas Jefferson National Accelerator Facility, Newport News, VA, USA"],"affiliations":[{"raw_affiliation_string":"Thomas Jefferson National Accelerator Facility, Newport News, VA, USA","institution_ids":["https://openalex.org/I29801172"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Alexei Strelchenko","orcid":null},"institutions":[{"id":"https://openalex.org/I1314696892","display_name":"Fermi National Accelerator Laboratory","ror":"https://ror.org/020hgte69","country_code":"US","type":"facility","lineage":["https://openalex.org/I1314696892","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210114836"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexei Strelchenko","raw_affiliation_strings":["Fermi National Accelerator Laboratory, Batavia, IL, USA"],"affiliations":[{"raw_affiliation_string":"Fermi National Accelerator Laboratory, Batavia, IL, USA","institution_ids":["https://openalex.org/I1314696892"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Michael Cheng","orcid":null},"institutions":[{"id":"https://openalex.org/I4388482696","display_name":"Naval Research Laboratory Information Technology Division","ror":"https://ror.org/04xfp8b22","country_code":null,"type":"facility","lineage":["https://openalex.org/I1288214837","https://openalex.org/I1330347796","https://openalex.org/I175003984","https://openalex.org/I3130687028","https://openalex.org/I4388482696"]},{"id":"https://openalex.org/I111088046","display_name":"Boston University","ror":"https://ror.org/05qwgg493","country_code":"US","type":"education","lineage":["https://openalex.org/I111088046"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Cheng","raw_affiliation_strings":["Center for Computational Science, Boston University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Center for Computational Science, Boston University, Boston, MA, USA","institution_ids":["https://openalex.org/I111088046","https://openalex.org/I4388482696"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Arjun Gambhir","orcid":null},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]},{"id":"https://openalex.org/I267592682","display_name":"Williams (United States)","ror":"https://ror.org/007zhvp17","country_code":"US","type":"company","lineage":["https://openalex.org/I267592682"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arjun Gambhir","raw_affiliation_strings":["The College of William and Mary, Williamsburg, VA, USA"],"affiliations":[{"raw_affiliation_string":"The College of William and Mary, Williamsburg, VA, USA","institution_ids":["https://openalex.org/I16285277","https://openalex.org/I267592682"]}]},{"author_position":"last","author":{"id":null,"display_name":"Richard. C. Brower","orcid":null},"institutions":[{"id":"https://openalex.org/I111088046","display_name":"Boston University","ror":"https://ror.org/05qwgg493","country_code":"US","type":"education","lineage":["https://openalex.org/I111088046"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Richard. C. Brower","raw_affiliation_strings":["Physics Department, Boston University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Physics Department, Boston University, Boston, MA, USA","institution_ids":["https://openalex.org/I111088046"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5046199609"],"corresponding_institution_ids":["https://openalex.org/I4210127875"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.35408958,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"795","last_page":"806"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9801999926567078,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10048","display_name":"Particle physics theoretical and experimental studies","score":0.9728999733924866,"subfield":{"id":"https://openalex.org/subfields/3106","display_name":"Nuclear and High Energy Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/multigrid-method","display_name":"Multigrid method","score":0.6925241947174072},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6428943872451782},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5424192547798157},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.4486503601074219},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.26395183801651},{"id":"https://openalex.org/keywords/partial-differential-equation","display_name":"Partial differential equation","score":0.08064639568328857}],"concepts":[{"id":"https://openalex.org/C137119250","wikidata":"https://www.wikidata.org/wiki/Q1413101","display_name":"Multigrid method","level":3,"score":0.6925241947174072},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6428943872451782},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5424192547798157},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.4486503601074219},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.26395183801651},{"id":"https://openalex.org/C93779851","wikidata":"https://www.wikidata.org/wiki/Q271977","display_name":"Partial differential equation","level":2,"score":0.08064639568328857},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/sc.2016.67","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sc.2016.67","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SC16: International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},{"id":"pmh:oai:osti.gov:1413692","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/1413692","pdf_url":null,"source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},{"id":"pmh:oai:osti.gov:1984942","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/1984942","pdf_url":null,"source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},{"id":"mag:3117225375","is_oa":false,"landing_page_url":"http://ui.adsabs.harvard.edu/abs/2016slft.confE.267C/abstract","pdf_url":null,"source":{"id":"https://openalex.org/S4306523789","display_name":"Proceedings of the 34th annual International Symposium on Lattice Field Theory (LATTICE2016). 24-30 July 2016. University of Southampton","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"Proceedings of the 34th annual International Symposium on Lattice Field Theory (LATTICE2016). 24-30 July 2016. University of Southampton","raw_type":null}],"best_oa_location":{"id":"pmh:oai:osti.gov:1413692","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/1413692","pdf_url":null,"source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},"sustainable_development_goals":[{"score":0.5899999737739563,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1967983557","https://openalex.org/W1982115588","https://openalex.org/W1984450486","https://openalex.org/W1987779630","https://openalex.org/W1994276589","https://openalex.org/W2000315635","https://openalex.org/W2011467018","https://openalex.org/W2035476608","https://openalex.org/W2045315360","https://openalex.org/W2055404352","https://openalex.org/W2057309399","https://openalex.org/W2087386549","https://openalex.org/W2090058354","https://openalex.org/W2094072432","https://openalex.org/W2099034447","https://openalex.org/W2113190809","https://openalex.org/W2118001135","https://openalex.org/W2126271681","https://openalex.org/W2155216327","https://openalex.org/W2155967869","https://openalex.org/W2295191825","https://openalex.org/W2316564661","https://openalex.org/W2593840094","https://openalex.org/W2963215039","https://openalex.org/W4250530934","https://openalex.org/W6637234551","https://openalex.org/W6687325977","https://openalex.org/W6688150730","https://openalex.org/W6690045743","https://openalex.org/W7008672335"],"related_works":["https://openalex.org/W2518179864","https://openalex.org/W2963563828","https://openalex.org/W2068528924","https://openalex.org/W2962998979","https://openalex.org/W3211968776","https://openalex.org/W3197222827","https://openalex.org/W2044626942","https://openalex.org/W3158780744","https://openalex.org/W2153149286","https://openalex.org/W1970906114","https://openalex.org/W2141579716","https://openalex.org/W2368802187","https://openalex.org/W1971418186","https://openalex.org/W2909831742","https://openalex.org/W2099916305","https://openalex.org/W41872799","https://openalex.org/W2591801545","https://openalex.org/W3187956650","https://openalex.org/W2029285237","https://openalex.org/W2040921949"],"abstract_inverted_index":{"The":[0],"past":[1],"decade":[2],"has":[3,19],"witnessed":[4],"a":[5],"dramatic":[6],"acceleration":[7],"of":[8,56,82,94,110,144],"lattice":[9],"quantum":[10],"chromodynamics":[11],"calculations":[12],"in":[13,26],"nuclear":[14],"and":[15,35,62,90,139],"particle":[16],"physics.":[17],"This":[18],"been":[20],"due":[21,36],"to":[22,37,53,97,123,136],"both":[23],"significant":[24],"progress":[25],"accelerating":[27],"the":[28,38,54,59,72,85,98,108,116,137,141],"iterative":[29],"linear":[30],"solvers":[31],"using":[32],"multigrid":[33],"algorithms,":[34],"throughput":[39],"improvements":[40],"brought":[41],"by":[42,78],"GPUs.":[43],"Deploying":[44],"hierarchical":[45],"algorithms":[46],"optimally":[47],"on":[48,58,131],"GPUs":[49],"is":[50],"non-trivial":[51],"owing":[52],"lack":[55],"parallelism":[57,83,96],"coarse":[60],"grids,":[61],"as":[63],"such,":[64],"these":[65],"advances":[66],"have":[67],"not":[68],"proved":[69],"multiplicative.":[70],"Using":[71],"QUDA":[73],"library,":[74],"we":[75,101,120,134],"demonstrate":[76,121],"that":[77,84],"exposing":[79],"all":[80],"sources":[81],"underlying":[86],"stencil":[87],"problem":[88],"possesses,":[89],"through":[91],"appropriate":[92],"mapping":[93],"this":[95],"GPU":[99],"architecture,":[100],"can":[102],"achieve":[103],"high":[104],"efficiency":[105],"even":[106],"for":[107,115],"coarsest":[109],"grids.":[111],"Results":[112],"are":[113],"presented":[114],"Wilson-Clover":[117],"discretization,":[118],"where":[119],"up":[122],"10x":[124],"speedup":[125],"over":[126],"present":[127],"state-of-the-art":[128],"GPU-accelerated":[129],"methods":[130],"Titan.":[132],"Finally,":[133],"look":[135],"future,":[138],"consider":[140],"software":[142],"implications":[143],"our":[145],"findings.":[146]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
