{"id":"https://openalex.org/W4312727772","doi":"https://doi.org/10.1109/tc.2022.3222955","title":"High-Performance Tensor Learning Primitives Using GPU Tensor Cores","display_name":"High-Performance Tensor Learning Primitives Using GPU Tensor Cores","publication_year":2022,"publication_date":"2022-12-15","ids":{"openalex":"https://openalex.org/W4312727772","doi":"https://doi.org/10.1109/tc.2022.3222955"},"language":"en","primary_location":{"id":"doi:10.1109/tc.2022.3222955","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2022.3222955","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100405233","display_name":"Xiao-Yang Liu","orcid":"https://orcid.org/0000-0002-9532-1709"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xiao-Yang Liu","raw_affiliation_strings":["Department of Electrical Engineering, Columbia University, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Columbia University, New York, NY, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059031640","display_name":"Zeliang Zhang","orcid":"https://orcid.org/0000-0002-3890-5388"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zeliang Zhang","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100462290","display_name":"Zhiyuan Wang","orcid":"https://orcid.org/0000-0002-3884-2487"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyuan Wang","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101891641","display_name":"Lu Han","orcid":"https://orcid.org/0009-0008-6614-4388"},"institutions":[{"id":"https://openalex.org/I141962983","display_name":"Shanghai University of Engineering Science","ror":"https://ror.org/0557b9y08","country_code":"CN","type":"education","lineage":["https://openalex.org/I141962983"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Han Lu","raw_affiliation_strings":["School of Computer Engineering and Science, Shanghai University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Engineering and Science, Shanghai University, Shanghai, China","institution_ids":["https://openalex.org/I141962983"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100382658","display_name":"Xiaodong Wang","orcid":"https://orcid.org/0000-0002-2945-9240"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaodong Wang","raw_affiliation_strings":["Department of Electrical Engineering, Columbia University, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Columbia University, New York, NY, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066875580","display_name":"Anwar Walid","orcid":"https://orcid.org/0000-0003-1992-6068"},"institutions":[{"id":"https://openalex.org/I72090969","display_name":"Nokia (United States)","ror":"https://ror.org/038km2573","country_code":"US","type":"company","lineage":["https://openalex.org/I2738502077","https://openalex.org/I72090969"]},{"id":"https://openalex.org/I196272386","display_name":"Providence College","ror":"https://ror.org/00rxpqe74","country_code":"US","type":"education","lineage":["https://openalex.org/I196272386"]},{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anwar Walid","raw_affiliation_strings":["Columbia University, New York, NY, USA","Nokia-Bell Labs, New Providence, NJ, USA"],"affiliations":[{"raw_affiliation_string":"Columbia University, New York, NY, USA","institution_ids":["https://openalex.org/I78577930"]},{"raw_affiliation_string":"Nokia-Bell Labs, New Providence, NJ, USA","institution_ids":["https://openalex.org/I196272386","https://openalex.org/I72090969"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100405233"],"corresponding_institution_ids":["https://openalex.org/I78577930"],"apc_list":null,"apc_paid":null,"fwci":1.3384,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.80196802,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"72","issue":"6","first_page":"1733","last_page":"1746"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9641000032424927,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.8229274153709412},{"id":"https://openalex.org/keywords/tucker-decomposition","display_name":"Tucker decomposition","score":0.6899661421775818},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.597926139831543},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5255167484283447},{"id":"https://openalex.org/keywords/notation","display_name":"Notation","score":0.4508315622806549},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3895258903503418},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3401625454425812},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3375725746154785},{"id":"https://openalex.org/keywords/tensor-decomposition","display_name":"Tensor decomposition","score":0.30347129702568054},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.20758184790611267},{"id":"https://openalex.org/keywords/geometry","display_name":"Geometry","score":0.14431926608085632}],"concepts":[{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.8229274153709412},{"id":"https://openalex.org/C42704193","wikidata":"https://www.wikidata.org/wiki/Q7851097","display_name":"Tucker decomposition","level":4,"score":0.6899661421775818},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.597926139831543},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5255167484283447},{"id":"https://openalex.org/C45357846","wikidata":"https://www.wikidata.org/wiki/Q2001982","display_name":"Notation","level":2,"score":0.4508315622806549},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3895258903503418},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3401625454425812},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3375725746154785},{"id":"https://openalex.org/C2986737658","wikidata":"https://www.wikidata.org/wiki/Q30103009","display_name":"Tensor decomposition","level":3,"score":0.30347129702568054},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.20758184790611267},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.14431926608085632}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tc.2022.3222955","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2022.3222955","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W1798945469","https://openalex.org/W1988888548","https://openalex.org/W2000215628","https://openalex.org/W2013912476","https://openalex.org/W2018838463","https://openalex.org/W2022745729","https://openalex.org/W2040230751","https://openalex.org/W2071000534","https://openalex.org/W2295543477","https://openalex.org/W2471966094","https://openalex.org/W2482092413","https://openalex.org/W2544822491","https://openalex.org/W2808671516","https://openalex.org/W2886162485","https://openalex.org/W2888987473","https://openalex.org/W2938943977","https://openalex.org/W2951293558","https://openalex.org/W2952113380","https://openalex.org/W2957577340","https://openalex.org/W2963137752","https://openalex.org/W2970400322","https://openalex.org/W2972487982","https://openalex.org/W2998004380","https://openalex.org/W3008803005","https://openalex.org/W3009762362","https://openalex.org/W3011383353","https://openalex.org/W3037045207","https://openalex.org/W3047788099","https://openalex.org/W3084371909","https://openalex.org/W3091170309","https://openalex.org/W3093047118","https://openalex.org/W3101321165","https://openalex.org/W3130554079","https://openalex.org/W3132532188","https://openalex.org/W3137884659","https://openalex.org/W3138335809","https://openalex.org/W3140077234","https://openalex.org/W3176934054","https://openalex.org/W3204376621","https://openalex.org/W3205226039","https://openalex.org/W4205613022","https://openalex.org/W4205947740","https://openalex.org/W4241741271","https://openalex.org/W4285160918","https://openalex.org/W4287208846","https://openalex.org/W4295312788","https://openalex.org/W4297797495","https://openalex.org/W6638005537","https://openalex.org/W6638060716","https://openalex.org/W6697330132","https://openalex.org/W6729203000","https://openalex.org/W6766978945","https://openalex.org/W6792188503","https://openalex.org/W6794457956","https://openalex.org/W6806119152"],"related_works":["https://openalex.org/W2891735857","https://openalex.org/W4214526161","https://openalex.org/W47805180","https://openalex.org/W2093953080","https://openalex.org/W2564982703","https://openalex.org/W3127610061","https://openalex.org/W2891277085","https://openalex.org/W2347172331","https://openalex.org/W3216281372","https://openalex.org/W4281643854"],"abstract_inverted_index":{"Tensor":[0],"learning":[1,21,51,61,72,88],"is":[2],"a":[3,112,134,220,239,246],"powerful":[4],"tool":[5],"for":[6,70,79,100,143,177,207],"big":[7,80],"data":[8,81],"analytics":[9],"and":[10,16,28,57,96,110,139,149,166,170,173,187,196,245],"machine":[11],"learning,":[12],"e.g.,":[13],"gene":[14,101,208],"analysis":[15,209],"deep":[17,119],"learning.":[18],"However,":[19],"tensor":[20,50,55,60,71,76,87,94,108,115,159,181,189,205,228],"algorithms":[22],"are":[23,141],"compute-intensive":[24],"since":[25],"their":[26,40],"time":[27],"space":[29],"complexities":[30],"grow":[31],"exponentially":[32],"with":[33,147,219],"the":[34,47,85,92,106,128,201,253],"order":[35],"of":[36,49,235,241,249,255],"tensors,":[37],"which":[38,131],"hinders":[39],"application.":[41],"In":[42],"this":[43],"paper,":[44],"we":[45,64,83,104],"exploit":[46],"parallelism":[48],"primitives":[52,73,89],"using":[53],"GPU":[54,75,144],"cores":[56],"develop":[58],"high-performance":[59],"algorithms.":[62],"First,":[63],"propose":[65,111],"novel":[66,113],"hardware-oriented":[67],"optimization":[68],"strategies":[69],"on":[74,152],"cores.":[77],"Second,":[78],"analytics,":[82],"employ":[84,123],"optimized":[86],"to":[90,117,126,163,193,212],"accelerate":[91],"CP":[93,158,186,204],"decomposition":[95,109,160,206],"then":[97],"apply":[98],"it":[99],"analysis.":[102],"Third,":[103],"optimize":[105],"Tucker":[107,114,180,188,227],"layer":[116,229],"compress":[118],"neural":[120,129,224,230],"networks.":[121],"We":[122],"natural":[124],"gradients":[125],"train":[127],"networks,":[130],"only":[132],"involve":[133],"forward":[135],"pass":[136],"without":[137],"backpropagation":[138],"thus":[140],"suitable":[142],"computations.":[145],"Compared":[146,218],"TensorLab":[148],"TensorLy":[150],"libraries":[151],"an":[153,233],"A100":[154],"GPU,":[155],"our":[156,178,226],"third-order":[157,179],"achieves":[161,210,232],"up":[162,192,211],"<inline-formula><tex-math":[164,167,171,174,194,197,213,236,242,250,256],"notation=\"LaTeX\">$16.32\\times$</tex-math></inline-formula>":[165],"notation=\"LaTeX\">$32.25\\times$</tex-math></inline-formula>":[168],"speedups;":[169],"notation=\"LaTeX\">$6.09\\times$</tex-math></inline-formula>":[172],"notation=\"LaTeX\">$6.72\\times$</tex-math></inline-formula>":[175],"speedups":[176,199],"decomposition.":[182],"The":[183],"proposed":[184],"fourth-order":[185],"decompositions":[190],"achieve":[191],"notation=\"LaTeX\">$30.65\\times$</tex-math></inline-formula>":[195],"notation=\"LaTeX\">$5.41\\times$</tex-math></inline-formula>":[198],"over":[200,216],"TensorLab.":[202],"Our":[203],"notation=\"LaTeX\">$5.88\\times$</tex-math></inline-formula>":[214],"speedup":[215,240],"TensorLy.":[217],"conventional":[221],"fully":[222],"connected":[223],"network,":[225],"network":[231],"accuracy":[234],"notation=\"LaTeX\">$97.9\\%$</tex-math></inline-formula>":[237],",":[238,244],"notation=\"LaTeX\">$4.47\\times$</tex-math></inline-formula>":[243],"compression":[247],"ratio":[248],"notation=\"LaTeX\">$2.92$</tex-math></inline-formula>":[251],"at":[252],"cost":[254],"notation=\"LaTeX\">$0.4\\%$</tex-math></inline-formula>":[257],"drop":[258],"in":[259],"accuracy.":[260]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
