{"id":"https://openalex.org/W4319870531","doi":"https://doi.org/10.1145/3543622.3573179","title":"Accelerating Sparse MTTKRP for Tensor Decomposition on FPGA","display_name":"Accelerating Sparse MTTKRP for Tensor Decomposition on FPGA","publication_year":2023,"publication_date":"2023-02-10","ids":{"openalex":"https://openalex.org/W4319870531","doi":"https://doi.org/10.1145/3543622.3573179"},"language":"en","primary_location":{"id":"doi:10.1145/3543622.3573179","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3543622.3573179","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3543622.3573179","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 ACM/SIGDA International Symposium on Field Programmable Gate Arrays","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3543622.3573179","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036985229","display_name":"Sasindu Wijeratne","orcid":"https://orcid.org/0000-0002-5538-2988"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sasindu Wijeratne","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-5538-2988","affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022009228","display_name":"Ta-Yang Wang","orcid":"https://orcid.org/0000-0003-4445-8710"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ta-Yang Wang","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"raw_orcid":"https://orcid.org/0000-0003-4445-8710","affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042560222","display_name":"Rajgopal Kannan","orcid":"https://orcid.org/0000-0001-8736-3012"},"institutions":[{"id":"https://openalex.org/I2802705668","display_name":"United States Army Combat Capabilities Development Command","ror":"https://ror.org/02rdkx920","country_code":"US","type":"other","lineage":["https://openalex.org/I1304082316","https://openalex.org/I1330347796","https://openalex.org/I2802705668","https://openalex.org/I4210154437"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rajgopal Kannan","raw_affiliation_strings":["DEVCOM US Army Research Lab, Los Angeles, CA, USA"],"raw_orcid":"https://orcid.org/0000-0001-8736-3012","affiliations":[{"raw_affiliation_string":"DEVCOM US Army Research Lab, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I2802705668"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033166029","display_name":"Viktor K. Prasanna","orcid":"https://orcid.org/0000-0002-1609-8589"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Viktor Prasanna","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-1609-8589","affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.9957,"has_fulltext":true,"cited_by_count":10,"citation_normalized_percentile":{"value":0.86817102,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"259","last_page":"269"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11304","display_name":"Advanced Neuroimaging Techniques and Applications","score":0.9412999749183655,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8090002536773682},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.6815247535705566},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6692157983779907},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6038807034492493},{"id":"https://openalex.org/keywords/tensor-contraction","display_name":"Tensor contraction","score":0.5763338208198547},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5689302086830139},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.5316151976585388},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5116875171661377},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.5026330947875977},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.40322810411453247},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.368864506483078},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.2648228406906128},{"id":"https://openalex.org/keywords/tensor-product","display_name":"Tensor product","score":0.20328783988952637},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.133404940366745}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8090002536773682},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.6815247535705566},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6692157983779907},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6038807034492493},{"id":"https://openalex.org/C124007464","wikidata":"https://www.wikidata.org/wiki/Q428091","display_name":"Tensor contraction","level":3,"score":0.5763338208198547},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5689302086830139},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.5316151976585388},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5116875171661377},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.5026330947875977},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.40322810411453247},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.368864506483078},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2648228406906128},{"id":"https://openalex.org/C51255310","wikidata":"https://www.wikidata.org/wiki/Q1163016","display_name":"Tensor product","level":2,"score":0.20328783988952637},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.133404940366745},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3543622.3573179","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3543622.3573179","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3543622.3573179","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 ACM/SIGDA International Symposium on Field Programmable Gate Arrays","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3543622.3573179","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3543622.3573179","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3543622.3573179","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 ACM/SIGDA International Symposium on Field Programmable Gate Arrays","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Climate action","score":0.5299999713897705,"id":"https://metadata.un.org/sdg/13"}],"awards":[{"id":"https://openalex.org/G3757289616","display_name":null,"funder_award_id":"2009057","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6077084736","display_name":null,"funder_award_id":"OAC-2209563","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6237556090","display_name":null,"funder_award_id":"CNS-2009057","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G805653150","display_name":"OAC Core: Scalable Graph ML on Distributed Heterogeneous Systems","funder_award_id":"2209563","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320337807","display_name":"U.S. Army Combat Capabilities Development Command","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4319870531.pdf","grobid_xml":"https://content.openalex.org/works/W4319870531.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W1511885491","https://openalex.org/W2079487069","https://openalex.org/W2104680817","https://openalex.org/W2469230926","https://openalex.org/W2767274246","https://openalex.org/W2902783593","https://openalex.org/W2951178714","https://openalex.org/W2971508019","https://openalex.org/W2986686333","https://openalex.org/W3005990752","https://openalex.org/W3015509582","https://openalex.org/W3015641590","https://openalex.org/W3016735325","https://openalex.org/W3103400026","https://openalex.org/W3131137184","https://openalex.org/W3164929383","https://openalex.org/W3200075413"],"related_works":["https://openalex.org/W1887380982","https://openalex.org/W2385280846","https://openalex.org/W2060415987","https://openalex.org/W2140205332","https://openalex.org/W2519254393","https://openalex.org/W4390617408","https://openalex.org/W4319065663","https://openalex.org/W4353081986","https://openalex.org/W2347719368","https://openalex.org/W2090507910"],"abstract_inverted_index":{"Sparse":[0],"Matricized":[1],"Tensor":[2],"Times":[3],"Khatri-Rao":[4],"Product":[5],"(spMTTKRP)":[6],"is":[7],"the":[8,29,50,56,61,67,72,78,97,109,116,120,147,156,162,167,185],"most":[9],"computationally":[10],"intensive":[11],"kernel":[12],"in":[13,180],"sparse":[14,194],"tensor":[15,46,73,99,110,149,195],"decomposition.":[16],"In":[17],"this":[18],"paper,":[19],"we":[20],"propose":[21,86],"a":[22,44,87,126,136,172],"hardware-algorithm":[23],"co-design":[24],"on":[25,191],"FPGA":[26,57,128],"to":[27,55,114,154],"minimize":[28],"execution":[30,181],"time":[31,182],"of":[32,37,52,63,71,96,101,119,135,138,146,161,166,175],"spMTTKRP":[33,64],"along":[34,65],"all":[35,66],"modes":[36],"an":[38],"input":[39,98,148],"tensor.":[40],"We":[41,85,124],"introduce":[42],"FLYCOO,":[43],"novel":[45],"format":[47],"that":[48,90,140],"eliminates":[49],"communication":[51],"intermediate":[53],"values":[54],"external":[58,121,163],"memory":[59,122,152,164],"during":[60,112],"computation":[62],"modes.":[68],"Our":[69,169],"remapping":[70],"using":[74],"FLYCOO":[75],"also":[76,107],"balances":[77],"workload":[79],"among":[80],"multiple":[81,94,144],"Processing":[82],"Engines":[83],"(PEs).":[84],"parallel":[88],"algorithm":[89,106],"can":[91,141],"concurrently":[92,142],"process":[93,143],"partitions":[95],"independent":[100],"each":[102],"other.":[103],"The":[104],"proposed":[105],"orders":[108],"dynamically":[111],"runtime":[113],"increase":[115],"data":[117],"locality":[118,160],"accesses.":[123],"develop":[125],"custom":[127],"accelerator":[129],"design":[130],"with":[131,184],"(1)":[132],"PEs":[133],"consisting":[134],"collection":[137],"pipelines":[139],"elements":[145],"and":[150,158,177,188],"(2)":[151],"controllers":[153],"exploit":[155],"spatial":[157],"temporal":[159],"accesses":[165],"computation.":[168],"work":[170],"achieves":[171],"geometric":[173],"mean":[174],"8.8X":[176],"3.8X":[178],"speedup":[179],"compared":[183],"state-of-the-art":[186],"CPU":[187],"GPU":[189],"implementations":[190],"widely-used":[192],"real-world":[193],"datasets.":[196]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
