{"id":"https://openalex.org/W2169150754","doi":"https://doi.org/10.1109/ipdpsw.2010.5470941","title":"Dense linear algebra solvers for multicore with GPU accelerators","display_name":"Dense linear algebra solvers for multicore with GPU accelerators","publication_year":2010,"publication_date":"2010-04-01","ids":{"openalex":"https://openalex.org/W2169150754","doi":"https://doi.org/10.1109/ipdpsw.2010.5470941","mag":"2169150754"},"language":"en","primary_location":{"id":"doi:10.1109/ipdpsw.2010.5470941","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdpsw.2010.5470941","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2010 IEEE International Symposium on Parallel &amp; Distributed Processing, Workshops and Phd Forum (IPDPSW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083604741","display_name":"Stanimire Tomov","orcid":"https://orcid.org/0000-0002-5937-7959"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stanimire Tomov","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, University of Tennessee, Knoxville, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of Tennessee, Knoxville, USA","institution_ids":["https://openalex.org/I75027704"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109721685","display_name":"Rajib Nath","orcid":null},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rajib Nath","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, University of Tennessee, Knoxville, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of Tennessee, Knoxville, USA","institution_ids":["https://openalex.org/I75027704"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017526753","display_name":"Hatem Ltaief","orcid":"https://orcid.org/0000-0002-6897-1095"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hatem Ltaief","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, University of Tennessee, Knoxville, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of Tennessee, Knoxville, USA","institution_ids":["https://openalex.org/I75027704"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075517045","display_name":"Jack Dongarra","orcid":"https://orcid.org/0000-0003-3247-1782"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jack Dongarra","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, University of Tennessee, Knoxville, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of Tennessee, Knoxville, USA","institution_ids":["https://openalex.org/I75027704"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I75027704"],"apc_list":null,"apc_paid":null,"fwci":24.8049,"has_fulltext":false,"cited_by_count":251,"citation_normalized_percentile":{"value":0.99711842,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8148015737533569},{"id":"https://openalex.org/keywords/linear-algebra","display_name":"Linear algebra","score":0.7205042243003845},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7039705514907837},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.6909099221229553},{"id":"https://openalex.org/keywords/cholesky-decomposition","display_name":"Cholesky decomposition","score":0.6738014817237854},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.6109948754310608},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.581076979637146},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.4881792664527893},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.47873303294181824},{"id":"https://openalex.org/keywords/xeon-phi","display_name":"Xeon Phi","score":0.4410167336463928},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.42840200662612915},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.24999850988388062},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11362636089324951},{"id":"https://openalex.org/keywords/eigenvalues-and-eigenvectors","display_name":"Eigenvalues and eigenvectors","score":0.09025764465332031}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8148015737533569},{"id":"https://openalex.org/C139352143","wikidata":"https://www.wikidata.org/wiki/Q82571","display_name":"Linear algebra","level":2,"score":0.7205042243003845},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7039705514907837},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.6909099221229553},{"id":"https://openalex.org/C34727166","wikidata":"https://www.wikidata.org/wiki/Q515375","display_name":"Cholesky decomposition","level":3,"score":0.6738014817237854},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.6109948754310608},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.581076979637146},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.4881792664527893},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.47873303294181824},{"id":"https://openalex.org/C96972482","wikidata":"https://www.wikidata.org/wiki/Q1049168","display_name":"Xeon Phi","level":2,"score":0.4410167336463928},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.42840200662612915},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.24999850988388062},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11362636089324951},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.09025764465332031},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/ipdpsw.2010.5470941","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdpsw.2010.5470941","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2010 IEEE International Symposium on Parallel &amp; Distributed Processing, Workshops and Phd Forum (IPDPSW)","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.157.7245","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.157.7245","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://icl.cs.utk.edu/news_pub/submissions/magma_solvers.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.416.6926","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.416.6926","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://web.eecs.utk.edu/~library/TechReports/2009/ut-cs-09-649.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.421.6076","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.421.6076","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.netlib.org/utk/people/JackDongarra/PAPERS/magma_solvers.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6200000047683716,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W1558138778","https://openalex.org/W1606525066","https://openalex.org/W1863336885","https://openalex.org/W2063186542","https://openalex.org/W2107410315","https://openalex.org/W2124480634","https://openalex.org/W2145960411","https://openalex.org/W2162322364","https://openalex.org/W2170611190","https://openalex.org/W2219494251","https://openalex.org/W3141650078","https://openalex.org/W6633456370","https://openalex.org/W6635988513","https://openalex.org/W6639351857","https://openalex.org/W6683941059","https://openalex.org/W6688660216"],"related_works":["https://openalex.org/W1963859303","https://openalex.org/W2364044215","https://openalex.org/W2389600408","https://openalex.org/W240129890","https://openalex.org/W3048701459","https://openalex.org/W2149078538","https://openalex.org/W2370314112","https://openalex.org/W1912958759","https://openalex.org/W2792081825","https://openalex.org/W2893308117"],"abstract_inverted_index":{"Solving":[0],"dense":[1,58],"linear":[2,31,59],"systems":[3,17,32],"of":[4,21,33,50,57,99,162],"equations":[5,34],"is":[6,91],"a":[7,107],"fundamental":[8],"problem":[9],"in":[10,19,54,81,96],"scientific":[11],"computing.":[12],"Numerical":[13],"simulations":[14],"involving":[15],"complex":[16],"represented":[18],"terms":[20],"unknown":[22],"variables":[23],"and":[24,84,102,112,122,124,134,155,188],"relations":[25],"between":[26],"them":[27],"often":[28],"lead":[29,159],"to":[30,70,73,160,167],"that":[35,175],"must":[36],"be":[37],"solved":[38],"as":[39,41,140,142],"fast":[40],"possible.":[42],"We":[43,67,105],"describe":[44,68],"current":[45],"efforts":[46],"toward":[47],"the":[48,55,76,97,152,168,192],"development":[49],"these":[51,82],"critical":[52],"solvers":[53,72,139,185],"area":[56],"algebra":[60],"(DLA)":[61],"for":[62,120,126,137],"multicore":[63,173],"with":[64],"GPU":[65,179],"accelerators.":[66,180],"how":[69],"code/develop":[71],"effectively":[74],"use":[75,106],"high":[77],"computing":[78],"power":[79],"available":[80,190],"new":[83,148],"emerging":[85],"hybrid":[86],"architectures.":[87],"The":[88,147,181],"approach":[89],"taken":[90],"based":[92],"on":[93,151,171],"hybridization":[94],"techniques":[95],"context":[98],"Cholesky,":[100],"LU,":[101],"QR":[103],"factorizations.":[104],"high-level":[108],"parallel":[109],"programming":[110],"model":[111],"leverage":[113],"existing":[114],"software":[115],"infrastructure,":[116],"e.g.":[117],"optimized":[118],"BLAS":[119],"CPU":[121,128],"GPU,":[123],"LAPACK":[125],"sequential":[127],"processing.":[129],"Included":[130],"also":[131],"are":[132,186],"architecture":[133],"algorithm-specific":[135],"optimizations":[136],"standard":[138,172],"well":[141],"mixed-precision":[143],"iterative":[144],"refinement":[145],"solvers.":[146],"algorithms,":[149],"depending":[150],"hardware":[153],"configuration":[154],"routine":[156],"parameters,":[157],"can":[158],"orders":[161],"magnitude":[163],"acceleration":[164],"when":[165],"compared":[166],"same":[169],"algorithms":[170],"architectures":[174],"do":[176],"not":[177],"contain":[178],"newly":[182],"developed":[183],"DLA":[184],"integrated":[187],"freely":[189],"through":[191],"MAGMA":[193],"library.":[194]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":13},{"year":2019,"cited_by_count":11},{"year":2018,"cited_by_count":23},{"year":2017,"cited_by_count":20},{"year":2016,"cited_by_count":11},{"year":2015,"cited_by_count":19},{"year":2014,"cited_by_count":23},{"year":2013,"cited_by_count":23},{"year":2012,"cited_by_count":30}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
