{"id":"https://openalex.org/W2765368488","doi":"https://doi.org/10.1109/hpec.2017.8091023","title":"Performance challenges for heterogeneous distributed tensor decompositions","display_name":"Performance challenges for heterogeneous distributed tensor decompositions","publication_year":2017,"publication_date":"2017-09-01","ids":{"openalex":"https://openalex.org/W2765368488","doi":"https://doi.org/10.1109/hpec.2017.8091023","mag":"2765368488"},"language":"en","primary_location":{"id":"doi:10.1109/hpec.2017.8091023","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2017.8091023","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026169135","display_name":"Thomas B. Rolinger","orcid":"https://orcid.org/0000-0001-8383-4737"},"institutions":[{"id":"https://openalex.org/I4210113003","display_name":"Physical Sciences (United States)","ror":"https://ror.org/021qvjc46","country_code":"US","type":"company","lineage":["https://openalex.org/I4210113003"]},{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Thomas B. Rolinger","raw_affiliation_strings":["Laboratory for Physical Sciences, University of Maryland, College Park, MD"],"affiliations":[{"raw_affiliation_string":"Laboratory for Physical Sciences, University of Maryland, College Park, MD","institution_ids":["https://openalex.org/I66946132","https://openalex.org/I4210113003"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051496784","display_name":"Tyler A. Simon","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113003","display_name":"Physical Sciences (United States)","ror":"https://ror.org/021qvjc46","country_code":"US","type":"company","lineage":["https://openalex.org/I4210113003"]},{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tyler A. Simon","raw_affiliation_strings":["Laboratory for Physical Sciences, University of Maryland, College Park, MD"],"affiliations":[{"raw_affiliation_string":"Laboratory for Physical Sciences, University of Maryland, College Park, MD","institution_ids":["https://openalex.org/I66946132","https://openalex.org/I4210113003"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033604917","display_name":"Christopher D. Krieger","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113003","display_name":"Physical Sciences (United States)","ror":"https://ror.org/021qvjc46","country_code":"US","type":"company","lineage":["https://openalex.org/I4210113003"]},{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher D. Krieger","raw_affiliation_strings":["Laboratory for Physical Sciences, University of Maryland, College Park, MD"],"affiliations":[{"raw_affiliation_string":"Laboratory for Physical Sciences, University of Maryland, College Park, MD","institution_ids":["https://openalex.org/I66946132","https://openalex.org/I4210113003"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5026169135"],"corresponding_institution_ids":["https://openalex.org/I4210113003","https://openalex.org/I66946132"],"apc_list":null,"apc_paid":null,"fwci":0.4612,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.58280255,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9283999800682068,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.827115535736084},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.723566472530365},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.5856775045394897},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5774329900741577},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.5751429796218872},{"id":"https://openalex.org/keywords/lu-decomposition","display_name":"LU decomposition","score":0.552011251449585},{"id":"https://openalex.org/keywords/decomposition","display_name":"Decomposition","score":0.5065325498580933},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.49259811639785767},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.47624731063842773},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.4454458951950073},{"id":"https://openalex.org/keywords/double-precision-floating-point-format","display_name":"Double-precision floating-point format","score":0.4437461197376251},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.43850529193878174},{"id":"https://openalex.org/keywords/cholesky-decomposition","display_name":"Cholesky decomposition","score":0.4284239411354065},{"id":"https://openalex.org/keywords/distributed-memory","display_name":"Distributed memory","score":0.4198927879333496},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.408651202917099},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.3764583468437195},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.29051363468170166},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.192030131816864},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13967254757881165},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1324048638343811}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.827115535736084},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.723566472530365},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.5856775045394897},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5774329900741577},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.5751429796218872},{"id":"https://openalex.org/C123213974","wikidata":"https://www.wikidata.org/wiki/Q833089","display_name":"LU decomposition","level":4,"score":0.552011251449585},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.5065325498580933},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.49259811639785767},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.47624731063842773},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.4454458951950073},{"id":"https://openalex.org/C35912277","wikidata":"https://www.wikidata.org/wiki/Q1243369","display_name":"Double-precision floating-point format","level":3,"score":0.4437461197376251},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.43850529193878174},{"id":"https://openalex.org/C34727166","wikidata":"https://www.wikidata.org/wiki/Q515375","display_name":"Cholesky decomposition","level":3,"score":0.4284239411354065},{"id":"https://openalex.org/C91481028","wikidata":"https://www.wikidata.org/wiki/Q1054686","display_name":"Distributed memory","level":3,"score":0.4198927879333496},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.408651202917099},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.3764583468437195},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.29051363468170166},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.192030131816864},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13967254757881165},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1324048638343811},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec.2017.8091023","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2017.8091023","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W123932431","https://openalex.org/W1511885491","https://openalex.org/W1768849904","https://openalex.org/W2013938784","https://openalex.org/W2024165284","https://openalex.org/W2025890876","https://openalex.org/W2040230751","https://openalex.org/W2079487069","https://openalex.org/W2103392911","https://openalex.org/W2112400467","https://openalex.org/W2112487361","https://openalex.org/W2155125497","https://openalex.org/W2159481344","https://openalex.org/W2245094585","https://openalex.org/W2558367073","https://openalex.org/W2561504423","https://openalex.org/W3000120212","https://openalex.org/W3199767607","https://openalex.org/W4239517679","https://openalex.org/W6676820125","https://openalex.org/W6802099551"],"related_works":["https://openalex.org/W4224251700","https://openalex.org/W2526784484","https://openalex.org/W1509943448","https://openalex.org/W2100843445","https://openalex.org/W2025695688","https://openalex.org/W3149476094","https://openalex.org/W4246966070","https://openalex.org/W2127144731","https://openalex.org/W1894894874","https://openalex.org/W2386003753"],"abstract_inverted_index":{"Tensor":[0],"decompositions,":[1],"which":[2],"are":[3,8,36],"factorizations":[4],"of":[5,49,59,62,92,97,126,157,161,182],"multi-dimensional":[6],"arrays,":[7],"becoming":[9],"increasingly":[10],"important":[11],"in":[12],"large-scale":[13],"data":[14],"analytics.":[15],"A":[16],"popular":[17],"tensor":[18,74,164],"decomposition":[19,50,75],"algorithm":[20],"is":[21,187],"Canonical":[22],"Decomposition/Parallel":[23],"Factorization":[24],"using":[25,129,185,195],"alternating":[26],"least":[27],"squares":[28],"fitting":[29],"(CP-ALS).":[30],"Tensors":[31],"that":[32,55,141,174],"model":[33],"real-world":[34],"applications":[35],"often":[37],"very":[38],"large":[39],"and":[40,114,134],"sparse,":[41],"driving":[42],"the":[43,89,124,150,158,168,180],"need":[44],"for":[45,149],"high":[46],"performance":[47,125,159],"implementations":[48],"algorithms,":[51],"such":[52],"as":[53],"CP-ALS,":[54],"can":[56],"take":[57],"advantage":[58],"many":[60],"types":[61],"compute":[63],"resources.":[64],"In":[65],"this":[66],"work":[67],"we":[68,153,170],"present":[69],"ReFacTo,":[70],"a":[71,95,106,155],"heterogeneous":[72,162],"distributed":[73,82,163],"implementation":[76,140],"based":[77,166],"on":[78,167,175],"DeFacTo,":[79],"an":[80,138],"existing":[81],"memory":[83],"approach":[84],"to":[85,94,110,118,137,177,189],"CP-ALS.":[86],"DFacTo":[87],"reduces":[88],"critical":[90],"routine":[91],"CP-ALS":[93],"series":[96],"sparse":[98],"matrix-vector":[99],"multiplications":[100],"(SpMVs).":[101],"ReFacTo":[102,127,183,193],"leverages":[103],"GPUs":[104],"within":[105],"cluster":[107],"via":[108],"MPI":[109],"perform":[111],"these":[112],"SpMVs":[113],"uses":[115,142],"OpenMP":[116],"threads":[117],"parallelize":[119],"other":[120],"routines.":[121],"We":[122,172],"evaluate":[123],"when":[128,184,194],"NVIDIA's":[130],"GPU-based":[131],"cuSPARSE":[132],"library":[133],"compare":[135],"it":[136],"alternative":[139],"Intel's":[143],"CPU-based":[144],"Math":[145],"Kernel":[146],"Library":[147],"(MKL)":[148],"SpMV.":[151],"Furthermore,":[152],"provide":[154],"discussion":[156],"challenges":[160],"decompositions":[165],"results":[169],"observed.":[171],"find":[173],"up":[176,188],"32":[178],"nodes,":[179],"SpMV":[181],"MKL":[186],"6.8\u00d7":[190],"faster":[191],"than":[192],"cuSPARSE.":[196]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
