{"id":"https://openalex.org/W1985263109","doi":"https://doi.org/10.1137/110838844","title":"Exposing Fine-Grained Parallelism in Algebraic Multigrid Methods","display_name":"Exposing Fine-Grained Parallelism in Algebraic Multigrid Methods","publication_year":2012,"publication_date":"2012-01-01","ids":{"openalex":"https://openalex.org/W1985263109","doi":"https://doi.org/10.1137/110838844","mag":"1985263109"},"language":"en","primary_location":{"id":"doi:10.1137/110838844","is_oa":false,"landing_page_url":"https://doi.org/10.1137/110838844","pdf_url":null,"source":{"id":"https://openalex.org/S165512578","display_name":"SIAM Journal on Scientific Computing","issn_l":"1064-8275","issn":["1064-8275","1095-7197"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Scientific Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108095852","display_name":"Nathan Bell","orcid":null},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Nathan Bell","raw_affiliation_strings":["Nvidia"],"affiliations":[{"raw_affiliation_string":"Nvidia","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112582638","display_name":"Steven Dalton","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Steven Dalton","raw_affiliation_strings":["University of Illinois at Urbana Champaign"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana Champaign","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027808971","display_name":"Luke N. Olson","orcid":"https://orcid.org/0000-0002-5283-6104"},"institutions":[{"id":"https://openalex.org/I19908199","display_name":"National Center for Supercomputing Applications","ror":"https://ror.org/03g9ch715","country_code":"BG","type":"facility","lineage":["https://openalex.org/I19908199"]}],"countries":["BG"],"is_corresponding":false,"raw_author_name":"Luke N. Olson","raw_affiliation_strings":["National Center for Supercomputing Applications (NCSA)"],"affiliations":[{"raw_affiliation_string":"National Center for Supercomputing Applications (NCSA)","institution_ids":["https://openalex.org/I19908199"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5108095852"],"corresponding_institution_ids":["https://openalex.org/I1304085615"],"apc_list":null,"apc_paid":null,"fwci":20.7399,"has_fulltext":false,"cited_by_count":205,"citation_normalized_percentile":{"value":0.99719787,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"34","issue":"4","first_page":"C123","last_page":"C152"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/multigrid-method","display_name":"Multigrid method","score":0.901613712310791},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8237196803092957},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7812559604644775},{"id":"https://openalex.org/keywords/massively-parallel","display_name":"Massively parallel","score":0.686409056186676},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6386171579360962},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.5452301502227783},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5345956683158875},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.4694131314754486},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4470880329608917},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.4379901885986328},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.4200972020626068},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1571594774723053},{"id":"https://openalex.org/keywords/partial-differential-equation","display_name":"Partial differential equation","score":0.06879931688308716}],"concepts":[{"id":"https://openalex.org/C137119250","wikidata":"https://www.wikidata.org/wiki/Q1413101","display_name":"Multigrid method","level":3,"score":0.901613712310791},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8237196803092957},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7812559604644775},{"id":"https://openalex.org/C190475519","wikidata":"https://www.wikidata.org/wiki/Q544384","display_name":"Massively parallel","level":2,"score":0.686409056186676},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6386171579360962},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.5452301502227783},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5345956683158875},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.4694131314754486},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4470880329608917},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.4379901885986328},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.4200972020626068},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1571594774723053},{"id":"https://openalex.org/C93779851","wikidata":"https://www.wikidata.org/wiki/Q271977","display_name":"Partial differential equation","level":2,"score":0.06879931688308716},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1137/110838844","is_oa":false,"landing_page_url":"https://doi.org/10.1137/110838844","pdf_url":null,"source":{"id":"https://openalex.org/S165512578","display_name":"SIAM Journal on Scientific Computing","issn_l":"1064-8275","issn":["1064-8275","1095-7197"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Scientific Computing","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.706.4071","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.706.4071","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://wnbell.com/media/2012-SISC-AMG/GPU-AMG-SISC.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.707.1606","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.707.1606","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://wnbell.com/media/2011-06-NVR-AMG/nvr-2011-002.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1884140786","https://openalex.org/W1975359839","https://openalex.org/W1988425770","https://openalex.org/W1990832096","https://openalex.org/W2006382715","https://openalex.org/W2017086619","https://openalex.org/W2019190770","https://openalex.org/W2025846466","https://openalex.org/W2032718616","https://openalex.org/W2039789965","https://openalex.org/W2053486661","https://openalex.org/W2063597636","https://openalex.org/W2072662440","https://openalex.org/W2083221501","https://openalex.org/W2089437293","https://openalex.org/W2100061495","https://openalex.org/W2100858639","https://openalex.org/W2112708371","https://openalex.org/W2119547137","https://openalex.org/W2124007994","https://openalex.org/W2128853364","https://openalex.org/W2131429735","https://openalex.org/W2132958419","https://openalex.org/W2142304406","https://openalex.org/W2142356482","https://openalex.org/W2154118576","https://openalex.org/W2173213060","https://openalex.org/W2477249138","https://openalex.org/W2498157954","https://openalex.org/W2536303343","https://openalex.org/W2739266613","https://openalex.org/W2763753778","https://openalex.org/W2915344229","https://openalex.org/W2997945685","https://openalex.org/W3087399931","https://openalex.org/W3185776723","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2032163384","https://openalex.org/W2574930369","https://openalex.org/W1554644772","https://openalex.org/W2003935582","https://openalex.org/W2950520577","https://openalex.org/W3209384898","https://openalex.org/W74409296","https://openalex.org/W1595834484","https://openalex.org/W2089690534","https://openalex.org/W3170887803"],"abstract_inverted_index":{"Algebraic":[0],"multigrid":[1,37,63,76],"methods":[2,38],"for":[3,18,28],"large,":[4],"sparse":[5],"linear":[6],"systems":[7],"are":[8,21,88,97],"a":[9,60,125],"necessity":[10],"in":[11,70,90,112,118],"many":[12],"computational":[13],"simulations,":[14],"yet":[15],"parallel":[16,41,61,94],"algorithms":[17,50,87],"such":[19,44],"solvers":[20],"generally":[22],"decomposed":[23],"into":[24],"coarse-grained":[25],"tasks":[26],"suitable":[27],"distributed":[29],"computers":[30],"with":[31,51],"traditional":[32],"processing":[33,47],"cores.":[34],"However,":[35],"accelerating":[36],"on":[39,100],"massively":[40],"throughput-oriented":[42],"processors,":[43],"as":[45,78,80],"graphics":[46],"units,":[48],"demands":[49],"abundant":[52],"fine-grained":[53,68],"parallelism.":[54],"In":[55],"this":[56],"paper,":[57],"we":[58],"develop":[59],"algebraic":[62],"method":[64],"which":[65],"exposes":[66],"substantial":[67],"parallelism":[69],"both":[71],"the":[72,75,81,101,113,119],"construction":[73],"of":[74,92,110],"hierarchy":[77],"well":[79],"cycling":[82,120],"or":[83],"solve":[84],"stage.":[85],"Our":[86],"expressed":[89],"terms":[91],"scalable":[93],"primitives":[95],"that":[96],"efficiently":[98],"implemented":[99],"GPU.":[102],"The":[103],"resulting":[104],"solver":[105],"achieves":[106],"an":[107],"average":[108],"speedup":[109],"$1.8\\times$":[111],"setup":[114],"phase":[115,121],"and":[116],"$5.7\\times$":[117],"when":[122],"compared":[123],"to":[124],"representative":[126],"CPU":[127],"implementation.":[128]},"counts_by_year":[{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":15},{"year":2021,"cited_by_count":18},{"year":2020,"cited_by_count":16},{"year":2019,"cited_by_count":16},{"year":2018,"cited_by_count":12},{"year":2017,"cited_by_count":12},{"year":2016,"cited_by_count":8},{"year":2015,"cited_by_count":25},{"year":2014,"cited_by_count":18},{"year":2013,"cited_by_count":12},{"year":2012,"cited_by_count":16}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
