{"id":"https://openalex.org/W4226395120","doi":"https://doi.org/10.1145/3524059.3532363","title":"Efficient, out-of-memory sparse MTTKRP on massively parallel architectures","display_name":"Efficient, out-of-memory sparse MTTKRP on massively parallel architectures","publication_year":2022,"publication_date":"2022-06-16","ids":{"openalex":"https://openalex.org/W4226395120","doi":"https://doi.org/10.1145/3524059.3532363"},"language":"en","primary_location":{"id":"doi:10.1145/3524059.3532363","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3524059.3532363","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 36th ACM International Conference on Supercomputing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2201.12523","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062030862","display_name":"Andy Nguyen","orcid":"https://orcid.org/0000-0002-0207-0323"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Andy Nguyen","raw_affiliation_strings":["University of Oregon"],"affiliations":[{"raw_affiliation_string":"University of Oregon","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090060327","display_name":"Ahmed E. Helal","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ahmed E. Helal","raw_affiliation_strings":["Intel Labs"],"affiliations":[{"raw_affiliation_string":"Intel Labs","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047885427","display_name":"Fabio Checconi","orcid":"https://orcid.org/0000-0002-7279-8459"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fabio Checconi","raw_affiliation_strings":["Intel Labs"],"affiliations":[{"raw_affiliation_string":"Intel Labs","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031307529","display_name":"Jan Laukemann","orcid":"https://orcid.org/0000-0002-3776-9353"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jan Laukemann","raw_affiliation_strings":["University of Erlangen-N\u00fcrnberg"],"affiliations":[{"raw_affiliation_string":"University of Erlangen-N\u00fcrnberg","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026593207","display_name":"Jesmin Jahan Tithi","orcid":"https://orcid.org/0000-0002-2861-8393"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jesmin Jahan Tithi","raw_affiliation_strings":["Intel Labs"],"affiliations":[{"raw_affiliation_string":"Intel Labs","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065069377","display_name":"Yongseok Soh","orcid":"https://orcid.org/0009-0000-8358-4993"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yongseok Soh","raw_affiliation_strings":["University of Oregon"],"affiliations":[{"raw_affiliation_string":"University of Oregon","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049815529","display_name":"Teresa Ranadive","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Teresa Ranadive","raw_affiliation_strings":["Laboratory for Physical Sciences"],"affiliations":[{"raw_affiliation_string":"Laboratory for Physical Sciences","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066195314","display_name":"Fabrizio Petrini","orcid":"https://orcid.org/0000-0002-4977-7107"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fabrizio Petrini","raw_affiliation_strings":["Intel Labs"],"affiliations":[{"raw_affiliation_string":"Intel Labs","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018257269","display_name":"Jee Woong Choi","orcid":"https://orcid.org/0000-0003-3330-9146"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jee W. Choi","raw_affiliation_strings":["University of Oregon"],"affiliations":[{"raw_affiliation_string":"University of Oregon","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5062030862"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.18646454,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"13"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13497","display_name":"Hermeneutics and Narrative Identity","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1211","display_name":"Philosophy"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13497","display_name":"Hermeneutics and Narrative Identity","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1211","display_name":"Philosophy"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13695","display_name":"Aging, Elder Care, and Social Issues","score":0.9749000072479248,"subfield":{"id":"https://openalex.org/subfields/3600","display_name":"General Health Professions"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T13099","display_name":"Health, Medicine and Society","score":0.95660001039505,"subfield":{"id":"https://openalex.org/subfields/3600","display_name":"General Health Professions"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8285789489746094},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7937140464782715},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7245000004768372},{"id":"https://openalex.org/keywords/massively-parallel","display_name":"Massively parallel","score":0.6085777282714844},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.5356822609901428},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.5311720967292786},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.47204455733299255},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.4557911157608032},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.43561336398124695},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.4316389560699463},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.33299553394317627}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8285789489746094},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7937140464782715},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7245000004768372},{"id":"https://openalex.org/C190475519","wikidata":"https://www.wikidata.org/wiki/Q544384","display_name":"Massively parallel","level":2,"score":0.6085777282714844},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.5356822609901428},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.5311720967292786},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.47204455733299255},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.4557911157608032},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.43561336398124695},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.4316389560699463},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33299553394317627},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3524059.3532363","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3524059.3532363","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 36th ACM International Conference on Supercomputing","raw_type":"proceedings-article"},{"id":"doi:10.48550/arxiv.2201.12523","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2201.12523","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2201.12523","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2201.12523","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article-journal"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1511885491","https://openalex.org/W1574504252","https://openalex.org/W1964704819","https://openalex.org/W1969116741","https://openalex.org/W1982825626","https://openalex.org/W2024165284","https://openalex.org/W2079487069","https://openalex.org/W2103392911","https://openalex.org/W2108138101","https://openalex.org/W2112185810","https://openalex.org/W2119511382","https://openalex.org/W2128853364","https://openalex.org/W2159314924","https://openalex.org/W2162283062","https://openalex.org/W2163687928","https://openalex.org/W2245094585","https://openalex.org/W2262232454","https://openalex.org/W2481767403","https://openalex.org/W2528907418","https://openalex.org/W2557625547","https://openalex.org/W2590246587","https://openalex.org/W2616934551","https://openalex.org/W2624446503","https://openalex.org/W2788264912","https://openalex.org/W2885801207","https://openalex.org/W2885990458","https://openalex.org/W2893530532","https://openalex.org/W2902783593","https://openalex.org/W2946068081","https://openalex.org/W2951178714","https://openalex.org/W2952598959","https://openalex.org/W2963768428","https://openalex.org/W2971508019","https://openalex.org/W2986686333","https://openalex.org/W3108012228","https://openalex.org/W3131137184","https://openalex.org/W3168654039"],"related_works":["https://openalex.org/W720048","https://openalex.org/W9401246","https://openalex.org/W4425985","https://openalex.org/W9048986","https://openalex.org/W11539422","https://openalex.org/W11061678","https://openalex.org/W16978306","https://openalex.org/W13297959","https://openalex.org/W10948162","https://openalex.org/W1978869"],"abstract_inverted_index":{"Tensor":[0],"decomposition":[1],"(TD)":[2],"is":[3,152],"an":[4,102],"important":[5],"method":[6],"for":[7,22],"extracting":[8],"latent":[9],"information":[10,129],"from":[11],"high-dimensional":[12],"(multi-modal)":[13],"sparse":[14,185,193],"data.":[15],"This":[16],"study":[17],"presents":[18],"a":[19,52,58,139,189],"novel":[20],"framework":[21,142,155],"accelerating":[23],"fundamental":[24],"TD":[25],"operations":[26],"on":[27,57,98,114,188],"massively":[28],"parallel":[29],"GPU":[30,75],"architectures.":[31],"In":[32],"contrast":[33],"to":[34,117,148,177],"prior":[35,149],"work,":[36],"the":[37,71,94,153,162,181],"proposed":[38],"Blocked":[39],"Linearized":[40],"Coordinate":[41],"(BLCO)":[42],"format":[43],"enables":[44],"efficient":[45],"out-of-memory":[46,159],"computation":[47],"of":[48,74,112,157,191],"tensor":[49,60],"algorithms":[50],"using":[51],"unified":[53],"implementation":[54],"that":[55],"works":[56],"single":[59],"copy.":[61],"Our":[62],"adaptive":[63],"blocking":[64],"and":[65,84,87,119,151,165],"linearization":[66],"strategies":[67],"not":[68],"only":[69,154],"meet":[70],"resource":[72],"constraints":[73],"devices,":[76],"but":[77],"also":[78],"accelerate":[79],"data":[80],"indexing,":[81],"eliminate":[82],"control-flow":[83],"memory-access":[85],"irregularities,":[86],"reduce":[88],"kernel":[89],"launching":[90],"overhead.":[91],"To":[92],"address":[93],"substantial":[95],"synchronization":[96],"cost":[97],"GPUs,":[99,167],"we":[100],"introduce":[101],"opportunistic":[103],"conflict":[104],"resolution":[105],"algorithm,":[106],"in":[107,134],"which":[108],"threads":[109],"collaborate":[110],"instead":[111],"contending":[113],"memory":[115],"access":[116],"discover":[118],"resolve":[120],"their":[121],"conflicting":[122],"updates":[123],"on-the-fly,":[124],"without":[125],"keeping":[126],"any":[127],"auxiliary":[128],"or":[130],"storing":[131],"non-zero":[132],"elements":[133],"specific":[135],"mode":[136],"orientations.":[137],"As":[138],"result,":[140],"our":[141],"delivers":[143],"superior":[144],"in-memory":[145],"performance":[146],"compared":[147],"state-of-the-art,":[150],"capable":[156],"processing":[158],"tensors.":[160,194],"On":[161],"latest":[163],"Intel":[164],"NVIDIA":[166],"BLCO":[168],"achieves":[169],"2.12":[170],"--":[171],"2.6X":[172],"geometric-mean":[173],"speedup":[174],"(with":[175],"up":[176],"33.35X":[178],"speedup)":[179],"over":[180],"state-of-the-art":[182],"mixed-mode":[183],"compressed":[184],"fiber":[186],"(MM-CSF)":[187],"range":[190],"real-world":[192]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2022-05-05T00:00:00"}
