{"id":"https://openalex.org/W4381748048","doi":"https://doi.org/10.1109/tpds.2023.3288520","title":"A Novel Parallel Algorithm for Sparse Tensor Matrix Chain Multiplication via TCU-Acceleration","display_name":"A Novel Parallel Algorithm for Sparse Tensor Matrix Chain Multiplication via TCU-Acceleration","publication_year":2023,"publication_date":"2023-06-23","ids":{"openalex":"https://openalex.org/W4381748048","doi":"https://doi.org/10.1109/tpds.2023.3288520"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2023.3288520","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2023.3288520","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100383993","display_name":"Haotian Wang","orcid":"https://orcid.org/0000-0002-0086-6301"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haotian Wang","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, Hunan University, Changsha, Hunan, China","National Supercomputing Center in Changsha, Changsha, Hunan, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, Hunan, China","institution_ids":["https://openalex.org/I16609230"]},{"raw_affiliation_string":"National Supercomputing Center in Changsha, Changsha, Hunan, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016247312","display_name":"Wangdong Yang","orcid":"https://orcid.org/0000-0003-2681-7898"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wangdong Yang","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, Hunan University, Changsha, Hunan, China","National Supercomputing Center in Changsha, Changsha, Hunan, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, Hunan, China","institution_ids":["https://openalex.org/I16609230"]},{"raw_affiliation_string":"National Supercomputing Center in Changsha, Changsha, Hunan, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066849666","display_name":"\u6804\u5fb3 \u5b8d\u6238","orcid":"https://orcid.org/0000-0003-4174-1326"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rong Hu","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, Hunan University, Changsha, Hunan, China","National Supercomputing Center in Changsha, Changsha, Hunan, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, Hunan, China","institution_ids":["https://openalex.org/I16609230"]},{"raw_affiliation_string":"National Supercomputing Center in Changsha, Changsha, Hunan, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047441166","display_name":"Renqiu Ouyang","orcid":"https://orcid.org/0000-0002-8712-4561"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Renqiu Ouyang","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, Hunan University, Changsha, Hunan, China","National Supercomputing Center in Changsha, Changsha, Hunan, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, Hunan, China","institution_ids":["https://openalex.org/I16609230"]},{"raw_affiliation_string":"National Supercomputing Center in Changsha, Changsha, Hunan, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078793726","display_name":"Kenli Li","orcid":"https://orcid.org/0000-0002-2635-7716"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kenli Li","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, Hunan University, Changsha, China","College of Computer Science and Electronic Engineering, Hunan University, Changsha, Hunan, China","National Supercomputing Center in Changsha, Changsha, Hunan, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]},{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, Hunan, China","institution_ids":["https://openalex.org/I16609230"]},{"raw_affiliation_string":"National Supercomputing Center in Changsha, Changsha, Hunan, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087894632","display_name":"Keqin Li","orcid":"https://orcid.org/0000-0001-5224-4048"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]},{"id":"https://openalex.org/I157455823","display_name":"SUNY New Paltz","ror":"https://ror.org/03j3dv688","country_code":"US","type":"education","lineage":["https://openalex.org/I157455823"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Keqin Li","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, Hunan University, Changsha, China","College of Computer Science and Electronic Engineering, Hunan University, Changsha, Hunan, China","Department of Computer Science, State University of New York, New Paltz, NY, USA"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]},{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, Hunan, China","institution_ids":["https://openalex.org/I16609230"]},{"raw_affiliation_string":"Department of Computer Science, State University of New York, New Paltz, NY, USA","institution_ids":["https://openalex.org/I157455823"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100383993"],"corresponding_institution_ids":["https://openalex.org/I16609230"],"apc_list":null,"apc_paid":null,"fwci":3.0374,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.92270531,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"34","issue":"8","first_page":"2419","last_page":"2432"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9879999756813049,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7863767147064209},{"id":"https://openalex.org/keywords/tucker-decomposition","display_name":"Tucker decomposition","score":0.7411929965019226},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.6414602994918823},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6343541741371155},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.5750672221183777},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.5450631976127625},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5188094973564148},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.48889198899269104},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.44642189145088196},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.42244717478752136},{"id":"https://openalex.org/keywords/tensor-decomposition","display_name":"Tensor decomposition","score":0.26567018032073975},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.24727582931518555},{"id":"https://openalex.org/keywords/pure-mathematics","display_name":"Pure mathematics","score":0.10994696617126465}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7863767147064209},{"id":"https://openalex.org/C42704193","wikidata":"https://www.wikidata.org/wiki/Q7851097","display_name":"Tucker decomposition","level":4,"score":0.7411929965019226},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.6414602994918823},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6343541741371155},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.5750672221183777},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.5450631976127625},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5188094973564148},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.48889198899269104},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.44642189145088196},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.42244717478752136},{"id":"https://openalex.org/C2986737658","wikidata":"https://www.wikidata.org/wiki/Q30103009","display_name":"Tensor decomposition","level":3,"score":0.26567018032073975},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.24727582931518555},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.10994696617126465},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2023.3288520","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2023.3288520","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.4099999964237213}],"awards":[{"id":"https://openalex.org/G4190216169","display_name":null,"funder_award_id":"61872127","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W2024165284","https://openalex.org/W2079487069","https://openalex.org/W2162254032","https://openalex.org/W2523567745","https://openalex.org/W2616934551","https://openalex.org/W2731053425","https://openalex.org/W2741701794","https://openalex.org/W2758691783","https://openalex.org/W2766702586","https://openalex.org/W2885801207","https://openalex.org/W2886162485","https://openalex.org/W2902783593","https://openalex.org/W2927180808","https://openalex.org/W2950289992","https://openalex.org/W2951178714","https://openalex.org/W2954307287","https://openalex.org/W2962779982","https://openalex.org/W2967617495","https://openalex.org/W2971508019","https://openalex.org/W2972487982","https://openalex.org/W2986686333","https://openalex.org/W3016735325","https://openalex.org/W3042016589","https://openalex.org/W3044913359","https://openalex.org/W3099762987","https://openalex.org/W3117765988","https://openalex.org/W3123612515","https://openalex.org/W3130554079","https://openalex.org/W3157209774","https://openalex.org/W3172914438","https://openalex.org/W3173953560","https://openalex.org/W3193212547","https://openalex.org/W3200803154","https://openalex.org/W4220912491","https://openalex.org/W4282975603","https://openalex.org/W4285160918","https://openalex.org/W4287254515","https://openalex.org/W4312191310","https://openalex.org/W6744667189","https://openalex.org/W6765119895","https://openalex.org/W6791844647"],"related_works":["https://openalex.org/W2293771254","https://openalex.org/W4221142455","https://openalex.org/W2030992542","https://openalex.org/W2119413962","https://openalex.org/W3121828480","https://openalex.org/W2914631005","https://openalex.org/W2032786851","https://openalex.org/W2039875226","https://openalex.org/W2952630098","https://openalex.org/W3132517792"],"abstract_inverted_index":{"Analysis":[0],"of":[1,44,60,122,170],"multi-dimensional":[2,16,99],"data,":[3],"especially":[4],"tensor":[5,95,104,131],"decomposition,":[6],"which":[7],"extracts":[8],"latent":[9],"information,":[10],"is":[11,19,40,47],"becoming":[12],"considerably":[13],"popular.":[14],"Although":[15],"sparse":[17,124,172],"data":[18],"typically":[20],"processed":[21],"on":[22,98,106,167],"multi-core":[23],"processors,":[24],"developing":[25],"highly":[26],"optimized":[27],"GPU-based":[28],"<b>Sp</b>":[29],"arse":[30],"<b>T</b>":[31],"ensor":[32],"<b>M</b>":[33,37],"atrix":[34],"<b>C</b>":[35],"hain":[36],"ultiplication":[38],"(SpTMCM)":[39],"challenging.":[41],"The":[42],"purpose":[43],"this":[45],"paper":[46],"to":[48,56,74,110,138,144],"investigate":[49],"a":[50,82,93,129,168],"novel":[51,79,136],"approach":[52,80,88,137],"named":[53],"SpTMCM":[54,61],"and":[55,86,156],"explore":[57],"the":[58,64,77,103,107,112,118,123,140],"discovery":[59],"coupled":[62],"with":[63,134],"emerging":[65],"computing":[66],"core,":[67],"Tensor":[68],"Core":[69],"Unit":[70],"(TCU).":[71],"In":[72],"contrast":[73],"prior":[75],"work,":[76],"proposed":[78],"enables":[81],"uniform":[83],"storage":[84],"format":[85,96],"optimization":[87],"for":[89,154,161],"SpTMCM.":[90],"We":[91],"design":[92],"hybrid":[94],"based":[97],"tiling":[100],"that":[101],"divides":[102],"depending":[105],"tile":[108],"threshold":[109],"address":[111],"inefficient":[113],"memory":[114,141],"accesses":[115],"caused":[116],"by":[117],"irregular":[119],"nonzero":[120],"distribution":[121],"tensor.":[125],"Further,":[126],"we":[127],"develop":[128],"TCU-based":[130],"parallel":[132],"algorithm":[133],"our":[135,147],"increase":[139],"bandwidth.":[142],"Compared":[143],"state-of-the-art":[145],"works,":[146],"method":[148],"achieves":[149],"<inline-formula><tex-math":[150,157],"notation=\"LaTeX\">$1.16\\sim":[151],"24.12\\times$</tex-math></inline-formula>":[152],"speedup":[153,160],"SpMTTKRP":[155],"notation=\"LaTeX\">$5.07\\sim":[158],"7.15\\times$</tex-math></inline-formula>":[159],"SpTTMChain":[162],"across":[163],"NVIDIA":[164],"A100":[165],"GPU":[166],"range":[169],"real-world":[171],"tensors.":[173]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
