{"id":"https://openalex.org/W3204728131","doi":"https://doi.org/10.1145/3472456.3473522","title":"Recursion Brings Speedup to Out-of-Core TensorCore-based Linear Algebra Algorithms: A Case Study of Classic Gram-Schmidt QR Factorization","display_name":"Recursion Brings Speedup to Out-of-Core TensorCore-based Linear Algebra Algorithms: A Case Study of Classic Gram-Schmidt QR Factorization","publication_year":2021,"publication_date":"2021-08-09","ids":{"openalex":"https://openalex.org/W3204728131","doi":"https://doi.org/10.1145/3472456.3473522","mag":"3204728131"},"language":"en","primary_location":{"id":"doi:10.1145/3472456.3473522","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3472456.3473522","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"50th International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091290657","display_name":"Shaoshuai Zhang","orcid":"https://orcid.org/0000-0002-9525-1659"},"institutions":[{"id":"https://openalex.org/I44461941","display_name":"University of Houston","ror":"https://ror.org/048sx0r50","country_code":"US","type":"education","lineage":["https://openalex.org/I44461941"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shaoshuai Zhang","raw_affiliation_strings":["University of Houston, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Houston, United States of America","institution_ids":["https://openalex.org/I44461941"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020822198","display_name":"Panruo Wu","orcid":"https://orcid.org/0000-0003-1859-3580"},"institutions":[{"id":"https://openalex.org/I44461941","display_name":"University of Houston","ror":"https://ror.org/048sx0r50","country_code":"US","type":"education","lineage":["https://openalex.org/I44461941"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Panruo Wu","raw_affiliation_strings":["University of Houston, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Houston, United States of America","institution_ids":["https://openalex.org/I44461941"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5091290657"],"corresponding_institution_ids":["https://openalex.org/I44461941"],"apc_list":null,"apc_paid":null,"fwci":0.6908,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.674503,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9750000238418579,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/recursion","display_name":"Recursion (computer science)","score":0.6818670630455017},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6569806933403015},{"id":"https://openalex.org/keywords/factorization","display_name":"Factorization","score":0.6221035718917847},{"id":"https://openalex.org/keywords/linear-algebra","display_name":"Linear algebra","score":0.5845509767532349},{"id":"https://openalex.org/keywords/core","display_name":"Core (optical fiber)","score":0.5652275085449219},{"id":"https://openalex.org/keywords/qr-decomposition","display_name":"QR decomposition","score":0.5322149991989136},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.518585205078125},{"id":"https://openalex.org/keywords/algebra-over-a-field","display_name":"Algebra over a field","score":0.5054082870483398},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5010058879852295},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.38247498869895935},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.30542534589767456},{"id":"https://openalex.org/keywords/pure-mathematics","display_name":"Pure mathematics","score":0.12792760133743286}],"concepts":[{"id":"https://openalex.org/C168773036","wikidata":"https://www.wikidata.org/wiki/Q264164","display_name":"Recursion (computer science)","level":2,"score":0.6818670630455017},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6569806933403015},{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.6221035718917847},{"id":"https://openalex.org/C139352143","wikidata":"https://www.wikidata.org/wiki/Q82571","display_name":"Linear algebra","level":2,"score":0.5845509767532349},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.5652275085449219},{"id":"https://openalex.org/C188060507","wikidata":"https://www.wikidata.org/wiki/Q653242","display_name":"QR decomposition","level":3,"score":0.5322149991989136},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.518585205078125},{"id":"https://openalex.org/C136119220","wikidata":"https://www.wikidata.org/wiki/Q1000660","display_name":"Algebra over a field","level":2,"score":0.5054082870483398},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5010058879852295},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.38247498869895935},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.30542534589767456},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.12792760133743286},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3472456.3473522","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3472456.3473522","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"50th International Conference on Parallel Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320309549","display_name":"University of Houston","ror":"https://ror.org/040vwpm13"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1982891411","https://openalex.org/W2007768252","https://openalex.org/W2031350942","https://openalex.org/W2032052535","https://openalex.org/W2104373803","https://openalex.org/W2165647555","https://openalex.org/W2208150243","https://openalex.org/W2253627208","https://openalex.org/W2286003308","https://openalex.org/W2548744442","https://openalex.org/W2567731932","https://openalex.org/W2613275038","https://openalex.org/W2765439756","https://openalex.org/W2791673912","https://openalex.org/W2796649226","https://openalex.org/W2808102735","https://openalex.org/W2895305554","https://openalex.org/W2901549770","https://openalex.org/W3037775421","https://openalex.org/W3100417409","https://openalex.org/W3101543398","https://openalex.org/W3104528661"],"related_works":["https://openalex.org/W1980554623","https://openalex.org/W2962431673","https://openalex.org/W4313433564","https://openalex.org/W2084353305","https://openalex.org/W4285580088","https://openalex.org/W3215209251","https://openalex.org/W4225492969","https://openalex.org/W1536607540","https://openalex.org/W1997033059","https://openalex.org/W210173153"],"abstract_inverted_index":{"Out-of-core":[0],"processing":[1],"aims":[2],"to":[3,31,34,102,105,128,160],"handle":[4],"large":[5],"amount":[6,136],"of":[7,38,62,74,137],"data":[8,44,77,93,112,138],"when":[9],"the":[10,35,39,43,54,60,69,72,99,111,145],"memory":[11,48],"is":[12,79],"limited.":[13],"There":[14],"exists":[15],"several":[16],"out-of-core":[17,26],"applications":[18,27],"including":[19],"disk-memory":[20],"and":[21,76,109,140],"CPU-GPU":[22],"processing.":[23],"Ideally,":[24],"these":[25],"can":[28,50,89,156],"be":[29,32,51,90,103,158],"expected":[30],"close":[33],"peak":[36],"performance":[37],"in-core":[40,55],"computations,":[41],"if":[42],"movement":[45,78,94,113,139],"between":[46,71],"different":[47],"hierarchies":[49],"overlapped":[52],"by":[53,92,114],"computations":[56,75,163],"effectively.":[57],"However,":[58],"with":[59],"emergence":[61],"matrix":[63,162],"accelerators":[64],"such":[65,82,97],"as":[66,125],"TensorCore":[67],"GPU,":[68],"imbalance":[70],"speed":[73],"further":[80],"exacerbated,":[81],"that":[83],"even":[84],"high":[85],"computation":[86],"intensity":[87],"kernels":[88],"dominated":[91],"cost.":[95],"In":[96,116],"cases,":[98],"algorithms":[100],"need":[101],"redesigned":[104],"reduce":[106],"communication":[107],"volume":[108],"overlap":[110],"pipelines.":[115],"this":[117,154],"paper,":[118],"we":[119],"select":[120],"classic":[121],"Gram-Schmidt":[122],"QR":[123,148],"factorization":[124,149],"an":[126],"example":[127],"illustrate":[129],"our":[130],"recursive":[131],"strategy,":[132],"which":[133],"shows":[134],"smaller":[135],"higher":[141],"overlapping":[142],"ratio":[143],"than":[144],"conventional":[146],"blocking":[147],"algorithm.":[150],"The":[151],"results":[152],"suggest":[153],"technique":[155],"potentially":[157],"applied":[159],"broader":[161],"kernels.":[164]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
