{"id":"https://openalex.org/W4282975603","doi":"https://doi.org/10.1145/3524059.3532363","title":"Efficient, Out-of-Memory Sparse MTTKRP on Massively Parallel Architectures","display_name":"Efficient, Out-of-Memory Sparse MTTKRP on Massively Parallel Architectures","publication_year":2022,"publication_date":"2022-01-29","ids":{"openalex":"https://openalex.org/W4282975603","doi":"https://doi.org/10.1145/3524059.3532363"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2201.12523","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2201.12523","pdf_url":"https://arxiv.org/pdf/2201.12523","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2201.12523","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112879622","display_name":"Andy Nguyen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Andy","raw_affiliation_strings":["University of Oregon"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Oregon","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090060327","display_name":"Ahmed E. Helal","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Helal, Ahmed E.","raw_affiliation_strings":["Intel Labs"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Intel Labs","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047885427","display_name":"Fabio Checconi","orcid":"https://orcid.org/0000-0002-7279-8459"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Checconi, Fabio","raw_affiliation_strings":["Intel Labs"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Intel Labs","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031307529","display_name":"Jan Laukemann","orcid":"https://orcid.org/0000-0002-3776-9353"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Laukemann, Jan","raw_affiliation_strings":["University of Erlangen-N\u00fcrnberg"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Erlangen-N\u00fcrnberg","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026593207","display_name":"Jesmin Jahan Tithi","orcid":"https://orcid.org/0000-0002-2861-8393"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tithi, Jesmin Jahan","raw_affiliation_strings":["Intel Labs"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Intel Labs","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065069377","display_name":"Yongseok Soh","orcid":"https://orcid.org/0009-0000-8358-4993"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Soh, Yongseok","raw_affiliation_strings":["University of Oregon"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Oregon","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049815529","display_name":"Teresa Ranadive","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ranadive, Teresa","raw_affiliation_strings":["Laboratory for Physical Sciences"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Laboratory for Physical Sciences","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066195314","display_name":"Fabrizio Petrini","orcid":"https://orcid.org/0000-0002-4977-7107"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Petrini, Fabrizio","raw_affiliation_strings":["Intel Labs"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Intel Labs","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5090972683","display_name":"Jee Choi","orcid":"https://orcid.org/0000-0002-6938-8221"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Choi, Jee W.","raw_affiliation_strings":["University of Oregon"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Oregon","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.7701,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.84753902,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9886999726295471,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9487000107765198,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8487366437911987},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7705976366996765},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7144961357116699},{"id":"https://openalex.org/keywords/massively-parallel","display_name":"Massively parallel","score":0.5892314910888672},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.5197912454605103},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.5126256346702576},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.4794844090938568},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.4444807171821594},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.4443321228027344},{"id":"https://openalex.org/keywords/distributed-memory","display_name":"Distributed memory","score":0.4287060797214508},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.4106111228466034}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8487366437911987},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7705976366996765},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7144961357116699},{"id":"https://openalex.org/C190475519","wikidata":"https://www.wikidata.org/wiki/Q544384","display_name":"Massively parallel","level":2,"score":0.5892314910888672},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.5197912454605103},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.5126256346702576},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.4794844090938568},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.4444807171821594},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.4443321228027344},{"id":"https://openalex.org/C91481028","wikidata":"https://www.wikidata.org/wiki/Q1054686","display_name":"Distributed memory","level":3,"score":0.4287060797214508},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.4106111228466034},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"pmh:oai:arXiv.org:2201.12523","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2201.12523","pdf_url":"https://arxiv.org/pdf/2201.12523","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2201.12523","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2201.12523","pdf_url":"https://arxiv.org/pdf/2201.12523","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1511885491","https://openalex.org/W1574504252","https://openalex.org/W1964704819","https://openalex.org/W1969116741","https://openalex.org/W1982825626","https://openalex.org/W2024165284","https://openalex.org/W2079487069","https://openalex.org/W2103392911","https://openalex.org/W2108138101","https://openalex.org/W2112185810","https://openalex.org/W2113841249","https://openalex.org/W2119511382","https://openalex.org/W2128853364","https://openalex.org/W2159314924","https://openalex.org/W2162283062","https://openalex.org/W2163687928","https://openalex.org/W2245094585","https://openalex.org/W2262232454","https://openalex.org/W2481767403","https://openalex.org/W2528907418","https://openalex.org/W2557625547","https://openalex.org/W2590246587","https://openalex.org/W2624446503","https://openalex.org/W2788264912","https://openalex.org/W2796649226","https://openalex.org/W2885801207","https://openalex.org/W2885990458","https://openalex.org/W2893530532","https://openalex.org/W2902783593","https://openalex.org/W2946068081","https://openalex.org/W2951178714","https://openalex.org/W2952598959","https://openalex.org/W2963768428","https://openalex.org/W2971508019","https://openalex.org/W2980728592","https://openalex.org/W2986686333","https://openalex.org/W3094876077","https://openalex.org/W3099762987","https://openalex.org/W3101742238","https://openalex.org/W3108012228","https://openalex.org/W3164929383","https://openalex.org/W3168654039","https://openalex.org/W4233475787"],"related_works":["https://openalex.org/W2026512611","https://openalex.org/W1985165680","https://openalex.org/W4245497162","https://openalex.org/W2353146130","https://openalex.org/W1990817968","https://openalex.org/W2150064838","https://openalex.org/W1933089384","https://openalex.org/W2185094550","https://openalex.org/W2088823210","https://openalex.org/W2135766592"],"abstract_inverted_index":{"Tensor":[0],"decomposition":[1],"(TD)":[2],"is":[3,152],"an":[4,102],"important":[5],"method":[6],"for":[7,22],"extracting":[8],"latent":[9],"information":[10,129],"from":[11],"high-dimensional":[12],"(multi-modal)":[13],"sparse":[14,183,191],"data.":[15],"This":[16],"study":[17],"presents":[18],"a":[19,52,58,139,187],"novel":[20],"framework":[21,142,155],"accelerating":[23],"fundamental":[24],"TD":[25],"operations":[26],"on":[27,57,98,114,186],"massively":[28],"parallel":[29],"GPU":[30,75],"architectures.":[31],"In":[32],"contrast":[33],"to":[34,117,148,175],"prior":[35,149],"work,":[36],"the":[37,71,94,153,162,179],"proposed":[38],"Blocked":[39],"Linearized":[40],"Coordinate":[41],"(BLCO)":[42],"format":[43],"enables":[44],"efficient":[45],"out-of-memory":[46,159],"computation":[47],"of":[48,74,112,157,189],"tensor":[49,60],"algorithms":[50],"using":[51],"unified":[53],"implementation":[54],"that":[55],"works":[56],"single":[59],"copy.":[61],"Our":[62],"adaptive":[63],"blocking":[64],"and":[65,84,87,119,151,165],"linearization":[66],"strategies":[67],"not":[68],"only":[69,154],"meet":[70],"resource":[72],"constraints":[73],"devices,":[76],"but":[77],"also":[78],"accelerate":[79],"data":[80],"indexing,":[81],"eliminate":[82],"control-flow":[83],"memory-access":[85],"irregularities,":[86],"reduce":[88],"kernel":[89],"launching":[90],"overhead.":[91],"To":[92],"address":[93],"substantial":[95],"synchronization":[96],"cost":[97],"GPUs,":[99,167],"we":[100],"introduce":[101],"opportunistic":[103],"conflict":[104],"resolution":[105],"algorithm,":[106],"in":[107,134],"which":[108],"threads":[109],"collaborate":[110],"instead":[111],"contending":[113],"memory":[115],"access":[116],"discover":[118],"resolve":[120],"their":[121],"conflicting":[122],"updates":[123],"on-the-fly,":[124],"without":[125],"keeping":[126],"any":[127],"auxiliary":[128],"or":[130],"storing":[131],"non-zero":[132],"elements":[133],"specific":[135],"mode":[136],"orientations.":[137],"As":[138],"result,":[140],"our":[141],"delivers":[143],"superior":[144],"in-memory":[145],"performance":[146],"compared":[147],"state-of-the-art,":[150],"capable":[156],"processing":[158],"tensors.":[160,192],"On":[161],"latest":[163],"Intel":[164],"NVIDIA":[166],"BLCO":[168],"achieves":[169],"2.12-2.6X":[170],"geometric-mean":[171],"speedup":[172],"(with":[173],"up":[174],"33.35X":[176],"speedup)":[177],"over":[178],"state-of-the-art":[180],"mixed-mode":[181],"compressed":[182],"fiber":[184],"(MM-CSF)":[185],"range":[188],"real-world":[190]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2022-06-17T00:00:00"}
