{"id":"https://openalex.org/W4401408950","doi":"https://doi.org/10.1145/3673038.3673128","title":"Accelerated Constrained Sparse Tensor Factorization on Massively Parallel Architectures","display_name":"Accelerated Constrained Sparse Tensor Factorization on Massively Parallel Architectures","publication_year":2024,"publication_date":"2024-08-08","ids":{"openalex":"https://openalex.org/W4401408950","doi":"https://doi.org/10.1145/3673038.3673128"},"language":"en","primary_location":{"id":"doi:10.1145/3673038.3673128","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3673038.3673128","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3673038.3673128","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 53rd International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3673038.3673128","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065069377","display_name":"Yongseok Soh","orcid":"https://orcid.org/0009-0000-8358-4993"},"institutions":[{"id":"https://openalex.org/I181233156","display_name":"University of Oregon","ror":"https://ror.org/0293rh119","country_code":"US","type":"education","lineage":["https://openalex.org/I181233156"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yongseok Soh","raw_affiliation_strings":["Department of Computer Science, University of Oregon, United States of America"],"raw_orcid":"https://orcid.org/0009-0000-8358-4993","affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Oregon, United States of America","institution_ids":["https://openalex.org/I181233156"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014168997","display_name":"Ramakrishnan Kannan","orcid":"https://orcid.org/0000-0002-5852-4806"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ramakrishnan Kannan","raw_affiliation_strings":["Oak Ridge National Lab, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-5852-4806","affiliations":[{"raw_affiliation_string":"Oak Ridge National Lab, United States of America","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048170299","display_name":"Piyush Sao","orcid":"https://orcid.org/0000-0002-9432-5855"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Piyush Sao","raw_affiliation_strings":["Oak Ridge National Lab, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-9432-5855","affiliations":[{"raw_affiliation_string":"Oak Ridge National Lab, United States of America","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5090972683","display_name":"Jee Choi","orcid":"https://orcid.org/0000-0002-6938-8221"},"institutions":[{"id":"https://openalex.org/I181233156","display_name":"University of Oregon","ror":"https://ror.org/0293rh119","country_code":"US","type":"education","lineage":["https://openalex.org/I181233156"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jee Choi","raw_affiliation_strings":["Department of Computer Science, University of Oregon, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-6938-8221","affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Oregon, United States of America","institution_ids":["https://openalex.org/I181233156"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.10268378,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"107","last_page":"116"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9276999831199646,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.8709149956703186},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8098260164260864},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8054116368293762},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7271296977996826},{"id":"https://openalex.org/keywords/massively-parallel","display_name":"Massively parallel","score":0.6185682415962219},{"id":"https://openalex.org/keywords/xeon-phi","display_name":"Xeon Phi","score":0.5920777320861816},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.4595974087715149},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4555555284023285},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.43643978238105774},{"id":"https://openalex.org/keywords/factorization","display_name":"Factorization","score":0.43540680408477783},{"id":"https://openalex.org/keywords/xeon","display_name":"Xeon","score":0.4213916063308716},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.26259949803352356},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12461355328559875},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.09619393944740295}],"concepts":[{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.8709149956703186},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8098260164260864},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8054116368293762},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7271296977996826},{"id":"https://openalex.org/C190475519","wikidata":"https://www.wikidata.org/wiki/Q544384","display_name":"Massively parallel","level":2,"score":0.6185682415962219},{"id":"https://openalex.org/C96972482","wikidata":"https://www.wikidata.org/wiki/Q1049168","display_name":"Xeon Phi","level":2,"score":0.5920777320861816},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.4595974087715149},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4555555284023285},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.43643978238105774},{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.43540680408477783},{"id":"https://openalex.org/C145108525","wikidata":"https://www.wikidata.org/wiki/Q656154","display_name":"Xeon","level":2,"score":0.4213916063308716},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.26259949803352356},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12461355328559875},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.09619393944740295},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3673038.3673128","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3673038.3673128","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3673038.3673128","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 53rd International Conference on Parallel Processing","raw_type":"proceedings-article"},{"id":"pmh:oai:osti.gov:2438687","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/2438687","pdf_url":"https://www.osti.gov/servlets/purl/2438687","source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"doi:10.1145/3673038.3673128","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3673038.3673128","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3673038.3673128","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 53rd International Conference on Parallel Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1511885491","https://openalex.org/W1850275953","https://openalex.org/W1902027874","https://openalex.org/W2024165284","https://openalex.org/W2026034143","https://openalex.org/W2155125497","https://openalex.org/W2262232454","https://openalex.org/W2294040673","https://openalex.org/W2469230926","https://openalex.org/W2524210471","https://openalex.org/W2616934551","https://openalex.org/W2753916251","https://openalex.org/W2788597588","https://openalex.org/W2885801207","https://openalex.org/W2886539428","https://openalex.org/W2893530532","https://openalex.org/W2897321523","https://openalex.org/W2986686333","https://openalex.org/W2992727511","https://openalex.org/W3005990752","https://openalex.org/W3011248981","https://openalex.org/W3081111652","https://openalex.org/W3099762987","https://openalex.org/W3103400026","https://openalex.org/W3131137184","https://openalex.org/W3164929383","https://openalex.org/W3173185726","https://openalex.org/W3174786754","https://openalex.org/W3204675283","https://openalex.org/W3210814098","https://openalex.org/W4200452998","https://openalex.org/W4282975603","https://openalex.org/W4292363360","https://openalex.org/W6766325992"],"related_works":["https://openalex.org/W1974923383","https://openalex.org/W2475524688","https://openalex.org/W2739740241","https://openalex.org/W2085105049","https://openalex.org/W2592417500","https://openalex.org/W2526069705","https://openalex.org/W2024016913","https://openalex.org/W2019153376","https://openalex.org/W2981664121","https://openalex.org/W2022666014"],"abstract_inverted_index":{"This":[0],"study":[1],"presents":[2],"the":[3,23,41,49,57,71,81,85,102,165,173],"first":[4,24],"constrained":[5],"sparse":[6,77,144],"tensor":[7,38,43],"factorization":[8],"(cSTF)":[9],"framework":[10,132,147],"that":[11,63,112],"optimizes":[12],"and":[13,22,115,123,158,168],"fully":[14],"offloads":[15],"computation":[16,122],"to":[17,34,119,138],"massively":[18],"parallel":[19],"GPU":[20,30,86],"architectures,":[21],"performance":[25,51,136],"characterization":[26],"of":[27,56,152],"cSTF":[28,58],"on":[29,37,60,84,126,164,178],"architectures.":[31],"In":[32],"contrast":[33],"prior":[35,139],"work":[36],"factorization,":[39],"where":[40],"matricized":[42],"times":[44],"Khatri-Rao":[45],"product":[46],"(MTTKRP)":[47],"is":[48],"primary":[50],"bottleneck,":[52],"our":[53,131,146],"systematic":[54],"analysis":[55],"algorithm":[59,111],"GPUs":[61],"reveals":[62],"adding":[64],"constraints":[65],"creates":[66],"an":[67],"additional":[68],"bottleneck":[69],"in":[70],"update":[72,82,103,110],"operation":[73,83],"for":[74],"many":[75],"real-world":[76,143],"tensors.":[78],"While":[79],"executing":[80],"brings":[87],"significant":[88,97],"speedup":[89,151],"over":[90,172],"its":[91],"CPU":[92],"counterpart,":[93],"it":[94],"remains":[95],"a":[96,108,129,179],"bottleneck.":[98],"To":[99],"further":[100],"accelerate":[101],"operation,":[104],"we":[105],"propose":[106],"cuADMM,":[107],"new":[109],"leverages":[113],"algorithmic":[114],"code":[116],"optimization":[117],"strategies":[118],"minimize":[120],"both":[121],"data":[124],"movement":[125],"GPUs.":[127],"As":[128],"result,":[130],"delivers":[133],"significantly":[134],"improved":[135],"compared":[137],"state-of-the-art.":[140],"On":[141],"10":[142],"tensors,":[145],"achieves":[148],"geometric":[149],"mean":[150],"5.1":[153],"\u00d7":[154,160],"(max":[155,161],"41.59":[156],"\u00d7)":[157,163],"7.01":[159],"58.05":[162],"NIVIDA":[166],"A100":[167],"H100":[169],"GPUs,":[170],"respectively,":[171],"state-of-the-art":[174],"SPLATT":[175],"library":[176],"running":[177],"26-core":[180],"Intel":[181],"Ice":[182],"Lake":[183],"Xeon":[184],"CPU.":[185]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
