{"id":"https://openalex.org/W2061781302","doi":"https://doi.org/10.1145/2132876.2132885","title":"High performance matrix inversion based on LU factorization for multicore architectures","display_name":"High performance matrix inversion based on LU factorization for multicore architectures","publication_year":2011,"publication_date":"2011-11-14","ids":{"openalex":"https://openalex.org/W2061781302","doi":"https://doi.org/10.1145/2132876.2132885","mag":"2061781302"},"language":"en","primary_location":{"id":"doi:10.1145/2132876.2132885","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2132876.2132885","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2011 ACM international workshop on Many task computing on grids and supercomputers","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075517045","display_name":"Jack Dongarra","orcid":"https://orcid.org/0000-0003-3247-1782"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]},{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jack Dongarra","raw_affiliation_strings":["University of Tennessee &amp; Oak Ridge National Laboratory &amp; University of Manchester, Knoxville, TN, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Tennessee &amp; Oak Ridge National Laboratory &amp; University of Manchester, Knoxville, TN, USA","institution_ids":["https://openalex.org/I75027704","https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034603717","display_name":"Mathieu Faverge","orcid":"https://orcid.org/0000-0002-2128-1230"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mathieu Faverge","raw_affiliation_strings":["University of Tennessee, Knoxville, TN, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Tennessee, Knoxville, TN, USA","institution_ids":["https://openalex.org/I75027704"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017526753","display_name":"Hatem Ltaief","orcid":"https://orcid.org/0000-0002-6897-1095"},"institutions":[{"id":"https://openalex.org/I71920554","display_name":"King Abdullah University of Science and Technology","ror":"https://ror.org/01q3tbs38","country_code":"SA","type":"education","lineage":["https://openalex.org/I71920554"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Hatem Ltaief","raw_affiliation_strings":["KAUST Supercomputing Laboratory, Thuwal, Saudi Arabia","[KAUST Supercomputing Laboratory, Thuwal, Saudi Arabia]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"KAUST Supercomputing Laboratory, Thuwal, Saudi Arabia","institution_ids":["https://openalex.org/I71920554"]},{"raw_affiliation_string":"[KAUST Supercomputing Laboratory, Thuwal, Saudi Arabia]","institution_ids":["https://openalex.org/I71920554"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073990539","display_name":"Piotr \u0141uszczek","orcid":"https://orcid.org/0000-0002-0089-6965"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Piotr Luszczek","raw_affiliation_strings":["University of Tennessee, Knoxville, TN, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Tennessee, Knoxville, TN, USA","institution_ids":["https://openalex.org/I75027704"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.5763,"has_fulltext":false,"cited_by_count":28,"citation_normalized_percentile":{"value":0.9007732,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"33","last_page":"42"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8045317530632019},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7631708383560181},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.518409013748169},{"id":"https://openalex.org/keywords/lu-decomposition","display_name":"LU decomposition","score":0.5039476752281189},{"id":"https://openalex.org/keywords/inversion","display_name":"Inversion (geology)","score":0.4825596213340759},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.4739632308483124},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.4217669665813446},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.41639888286590576},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3742150068283081}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8045317530632019},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7631708383560181},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.518409013748169},{"id":"https://openalex.org/C123213974","wikidata":"https://www.wikidata.org/wiki/Q833089","display_name":"LU decomposition","level":4,"score":0.5039476752281189},{"id":"https://openalex.org/C1893757","wikidata":"https://www.wikidata.org/wiki/Q3653001","display_name":"Inversion (geology)","level":3,"score":0.4825596213340759},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.4739632308483124},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.4217669665813446},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.41639888286590576},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3742150068283081},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C109007969","wikidata":"https://www.wikidata.org/wiki/Q749565","display_name":"Structural basin","level":2,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2132876.2132885","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2132876.2132885","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2011 ACM international workshop on Many task computing on grids and supercomputers","raw_type":"proceedings-article"},{"id":"pmh:oai:repository.kaust.edu.sa:10754/575750","is_oa":false,"landing_page_url":"http://hdl.handle.net/10754/575750","pdf_url":null,"source":{"id":"https://openalex.org/S4306401596","display_name":"King Abdullah University of Science and Technology Repository (King Abdullah University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I71920554","host_organization_name":"King Abdullah University of Science and Technology","host_organization_lineage":["https://openalex.org/I71920554"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.8899999856948853,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W94439627","https://openalex.org/W1494445573","https://openalex.org/W1528905581","https://openalex.org/W1689373798","https://openalex.org/W1781664937","https://openalex.org/W1964262399","https://openalex.org/W1983035190","https://openalex.org/W2002257715","https://openalex.org/W2006384005","https://openalex.org/W2018843852","https://openalex.org/W2020165759","https://openalex.org/W2024683548","https://openalex.org/W2076318713","https://openalex.org/W2076441952","https://openalex.org/W2077760421","https://openalex.org/W2084727637","https://openalex.org/W2088307891","https://openalex.org/W2088547189","https://openalex.org/W2103803358","https://openalex.org/W2110147698","https://openalex.org/W2117293168","https://openalex.org/W2119239390","https://openalex.org/W2120048671","https://openalex.org/W2122747952","https://openalex.org/W2148551624","https://openalex.org/W2151587416","https://openalex.org/W2151769723","https://openalex.org/W2154389097","https://openalex.org/W2168612748","https://openalex.org/W2319885746","https://openalex.org/W2405076432","https://openalex.org/W2733499314","https://openalex.org/W2790374560","https://openalex.org/W2794413559","https://openalex.org/W2798909945","https://openalex.org/W2870727505","https://openalex.org/W2903760723","https://openalex.org/W2946724226","https://openalex.org/W4229666556","https://openalex.org/W4230115272","https://openalex.org/W4236558082","https://openalex.org/W4301861531","https://openalex.org/W6641311335","https://openalex.org/W6675580387","https://openalex.org/W6677560469","https://openalex.org/W6993143405"],"related_works":["https://openalex.org/W2030992542","https://openalex.org/W1988437637","https://openalex.org/W2898722594","https://openalex.org/W1977700955","https://openalex.org/W2344713096","https://openalex.org/W4388311419","https://openalex.org/W2113856999","https://openalex.org/W4391266752","https://openalex.org/W4318969829","https://openalex.org/W133838137"],"abstract_inverted_index":{"The":[0,24,191],"goal":[1],"of":[2,11,16,53,87,124,152,226,232],"this":[3],"paper":[4],"is":[5,27,91,100,178,245],"to":[6,147,160,165,180],"present":[7],"an":[8,12,81],"efficient":[9,248],"implementation":[10,144,199,244],"explicit":[13],"matrix":[14,56,75,125,197,231],"inversion":[15,25,198],"general":[17],"square":[18],"matrices":[19],"on":[20,63,102,186,215],"multicore":[21],"computer":[22],"architecture.":[23],"procedure":[26],"split":[28],"into":[29,93],"four":[30,89,221],"steps:":[31],"1)":[32],"computing":[33],"the":[34,39,54,64,74,77,85,88,103,107,117,129,150,153,167,187,202,224],"LU":[35],"factorization,":[36],"2)":[37],"inverting":[38],"upper":[40],"triangular":[41],"U":[42],"factor,":[43],"3)":[44],"solving":[45],"a":[46,68,216,230],"linear":[47],"system,":[48],"whose":[49],"solution":[50],"yields":[51],"inverse":[52],"original":[55],"and":[57,115,164,212,223,249],"4)":[58],"applying":[59],"backward":[60],"column":[61],"pivoting":[62],"inverted":[65],"matrix.":[66],"Using":[67],"tile":[69],"data":[70,109,118],"layout,":[71],"which":[72,105,139],"represents":[73,106],"in":[76,128,142,145],"system":[78,175],"memory":[79],"with":[80,169,220],"optimized":[82],"cache-aware":[83],"format,":[84],"computation":[86],"steps":[90],"decomposed":[92],"computational":[94],"tasks.":[95],"A":[96,172,235],"directed":[97],"acyclic":[98],"graph":[99],"generated":[101],"fly":[104],"program":[108],"flow.":[110],"Its":[111],"nodes":[112],"represent":[113],"tasks":[114,168],"edges":[116],"dependencies":[119],"between":[120],"them.":[121],"Previous":[122],"implementations":[123],"inversions,":[126],"available":[127,188],"state-of-the-art":[130,203],"numerical":[131,184,204],"libraries,":[132],"are":[133,140],"suffer":[134],"from":[135,194],"unnecessary":[136],"synchronization":[137],"points,":[138],"non-existent":[141],"our":[143,183,195,241],"order":[146],"fully":[148],"exploit":[149],"parallelism":[151],"underlying":[154],"hardware.":[155],"Our":[156],"algorithmic":[157],"approach":[158],"allows":[159],"remove":[161],"these":[162],"bottlenecks":[163],"execute":[166],"loose":[170],"synchronization.":[171],"runtime":[173],"environment":[174],"called":[176],"QUARK":[177],"necessary":[179],"dynamically":[181],"schedule":[182],"kernels":[185],"processing":[189],"units.":[190],"reported":[192],"results":[193],"LU-based":[196],"significantly":[200],"outperform":[201],"libraries":[205],"such":[206],"as":[207],"LAPACK":[208],"(5x),":[209],"MKL":[210],"(5x)":[211],"ScaLAPACK":[213],"(2.5x)":[214],"contemporary":[217],"AMD":[218],"platform":[219],"sockets":[222],"total":[225],"48":[227],"cores":[228],"for":[229],"size":[233],"24000.":[234],"power":[236,253],"consumption":[237],"analysis":[238],"shows":[239],"that":[240],"high":[242],"performance":[243],"also":[246],"energy":[247],"substantially":[250],"consumes":[251],"less":[252],"than":[254],"its":[255],"competitors.":[256]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":5},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
