{"id":"https://openalex.org/W4308090781","doi":"https://doi.org/10.1109/hpec55821.2022.9926288","title":"Distributed Out-of-Memory SVD on CPU/GPU Architectures","display_name":"Distributed Out-of-Memory SVD on CPU/GPU Architectures","publication_year":2022,"publication_date":"2022-09-19","ids":{"openalex":"https://openalex.org/W4308090781","doi":"https://doi.org/10.1109/hpec55821.2022.9926288"},"language":"en","primary_location":{"id":"doi:10.1109/hpec55821.2022.9926288","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec55821.2022.9926288","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070040374","display_name":"Ismael Boureima","orcid":null},"institutions":[{"id":"https://openalex.org/I4210131040","display_name":"Los Alamos National Security (United States)","ror":"https://ror.org/037ddaj94","country_code":"US","type":"company","lineage":["https://openalex.org/I4210131040"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ismael Boureima","raw_affiliation_strings":["LANL,Theoretical Division,Los Alamos,U.S","Theoretical Division, LANL, Los Alamos, U.S"],"affiliations":[{"raw_affiliation_string":"LANL,Theoretical Division,Los Alamos,U.S","institution_ids":["https://openalex.org/I4210131040"]},{"raw_affiliation_string":"Theoretical Division, LANL, Los Alamos, U.S","institution_ids":["https://openalex.org/I4210131040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012614331","display_name":"Manish Bhattarai","orcid":"https://orcid.org/0000-0002-1421-3643"},"institutions":[{"id":"https://openalex.org/I4210131040","display_name":"Los Alamos National Security (United States)","ror":"https://ror.org/037ddaj94","country_code":"US","type":"company","lineage":["https://openalex.org/I4210131040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Manish Bhattarai","raw_affiliation_strings":["LANL,Theoretical Division,Los Alamos,U.S","Theoretical Division, LANL, Los Alamos, U.S"],"affiliations":[{"raw_affiliation_string":"LANL,Theoretical Division,Los Alamos,U.S","institution_ids":["https://openalex.org/I4210131040"]},{"raw_affiliation_string":"Theoretical Division, LANL, Los Alamos, U.S","institution_ids":["https://openalex.org/I4210131040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066261993","display_name":"Maksim E. Eren","orcid":"https://orcid.org/0000-0002-4362-0256"},"institutions":[{"id":"https://openalex.org/I4210131040","display_name":"Los Alamos National Security (United States)","ror":"https://ror.org/037ddaj94","country_code":"US","type":"company","lineage":["https://openalex.org/I4210131040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Maksim E. Eren","raw_affiliation_strings":["LANL,Theoretical Division,Los Alamos,U.S","Theoretical Division, LANL, Los Alamos, U.S"],"affiliations":[{"raw_affiliation_string":"LANL,Theoretical Division,Los Alamos,U.S","institution_ids":["https://openalex.org/I4210131040"]},{"raw_affiliation_string":"Theoretical Division, LANL, Los Alamos, U.S","institution_ids":["https://openalex.org/I4210131040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071928134","display_name":"Nick Solovyev","orcid":null},"institutions":[{"id":"https://openalex.org/I4210131040","display_name":"Los Alamos National Security (United States)","ror":"https://ror.org/037ddaj94","country_code":"US","type":"company","lineage":["https://openalex.org/I4210131040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nick Solovyev","raw_affiliation_strings":["LANL,Theoretical Division,Los Alamos,U.S","Theoretical Division, LANL, Los Alamos, U.S"],"affiliations":[{"raw_affiliation_string":"LANL,Theoretical Division,Los Alamos,U.S","institution_ids":["https://openalex.org/I4210131040"]},{"raw_affiliation_string":"Theoretical Division, LANL, Los Alamos, U.S","institution_ids":["https://openalex.org/I4210131040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057625550","display_name":"Hristo Djidjev","orcid":"https://orcid.org/0000-0001-9286-8824"},"institutions":[{"id":"https://openalex.org/I4210131040","display_name":"Los Alamos National Security (United States)","ror":"https://ror.org/037ddaj94","country_code":"US","type":"company","lineage":["https://openalex.org/I4210131040"]},{"id":"https://openalex.org/I4210163063","display_name":"Institute of Information and Communication Technologies","ror":"https://ror.org/05fpsjc82","country_code":"BG","type":"facility","lineage":["https://openalex.org/I24768866","https://openalex.org/I4210163063"]}],"countries":["BG","US"],"is_corresponding":false,"raw_author_name":"Hristo Djidjev","raw_affiliation_strings":["Information Systems LANL,Los Alamos,U.S","Information Systems LANL, Los Alamos, U.S","IICT, Sofia, Bulgaria"],"affiliations":[{"raw_affiliation_string":"Information Systems LANL,Los Alamos,U.S","institution_ids":["https://openalex.org/I4210131040"]},{"raw_affiliation_string":"Information Systems LANL, Los Alamos, U.S","institution_ids":["https://openalex.org/I4210131040"]},{"raw_affiliation_string":"IICT, Sofia, Bulgaria","institution_ids":["https://openalex.org/I4210163063"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079029372","display_name":"Boian S. Alexandrov","orcid":"https://orcid.org/0000-0001-8636-4603"},"institutions":[{"id":"https://openalex.org/I4210131040","display_name":"Los Alamos National Security (United States)","ror":"https://ror.org/037ddaj94","country_code":"US","type":"company","lineage":["https://openalex.org/I4210131040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Boian S. Alexandrov","raw_affiliation_strings":["LANL,Theoretical Division,Los Alamos,U.S","Theoretical Division, LANL, Los Alamos, U.S"],"affiliations":[{"raw_affiliation_string":"LANL,Theoretical Division,Los Alamos,U.S","institution_ids":["https://openalex.org/I4210131040"]},{"raw_affiliation_string":"Theoretical Division, LANL, Los Alamos, U.S","institution_ids":["https://openalex.org/I4210131040"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5070040374"],"corresponding_institution_ids":["https://openalex.org/I4210131040"],"apc_list":null,"apc_paid":null,"fwci":0.7304,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.74070022,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/singular-value-decomposition","display_name":"Singular value decomposition","score":0.8454903364181519},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6828521490097046},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.4875950217247009},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.48556825518608093},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.43867388367652893},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.38070207834243774},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3795376121997833},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3369661569595337}],"concepts":[{"id":"https://openalex.org/C22789450","wikidata":"https://www.wikidata.org/wiki/Q420904","display_name":"Singular value decomposition","level":2,"score":0.8454903364181519},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6828521490097046},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.4875950217247009},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.48556825518608093},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.43867388367652893},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.38070207834243774},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3795376121997833},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3369661569595337},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec55821.2022.9926288","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec55821.2022.9926288","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W104758865","https://openalex.org/W2073835183","https://openalex.org/W2535374105","https://openalex.org/W2564936110","https://openalex.org/W2613275038","https://openalex.org/W2736056409","https://openalex.org/W2765309601","https://openalex.org/W2765887041","https://openalex.org/W2900234561","https://openalex.org/W3003257820","https://openalex.org/W3017126053","https://openalex.org/W3034976662","https://openalex.org/W3035965352","https://openalex.org/W3099878876","https://openalex.org/W3117843218","https://openalex.org/W3164436820","https://openalex.org/W3201568135","https://openalex.org/W4225919909","https://openalex.org/W4226405319","https://openalex.org/W6604293180","https://openalex.org/W6774349369","https://openalex.org/W6810741609"],"related_works":["https://openalex.org/W2782904003","https://openalex.org/W4226434912","https://openalex.org/W2118633810","https://openalex.org/W2150953077","https://openalex.org/W2002598339","https://openalex.org/W2920931670","https://openalex.org/W2133814741","https://openalex.org/W1995410415","https://openalex.org/W3108343813","https://openalex.org/W4287663927"],"abstract_inverted_index":{"We":[0,152,244],"propose":[1,57],"an":[2,58,239],"efficient,":[3],"distributed,":[4],"out-of-memory":[5,134],"implementation":[6,59,130,240],"of":[7,23,38,50,60,99,157,248,252,261,267,272],"the":[8,32,36,39,45,51,64,82,97,100,137,148,154,167,209,246],"truncated":[9,70],"singular":[10,33,40,71,74],"value":[11],"decomposition":[12],"(t-SVD)":[13],"for":[14,133],"heterogeneous":[15],"high":[16],"performance":[17],"computing":[18],"(HPC)":[19],"systems.":[20],"Various":[21],"implementations":[22],"SVD":[24,61,254],"have":[25],"been":[26],"proposed,":[27],"with":[28,96,181,195,212,229,241,270],"most":[29],"only":[30],"estimate":[31],"values":[34,72],"as":[35],"estimation":[37,76],"vectors":[41,75],"can":[42,108],"significantly":[43,227],"increase":[44],"time":[46],"and":[47,73,117,126,175,185,217,222,264],"memory":[48,138,155],"complexity":[49,156],"algorithm.":[52],"In":[53,219],"this":[54],"work,":[55],"we":[56,176,201],"based":[62,205],"on":[63],"power":[65,83],"method,":[66],"which":[67,107],"is":[68,115,124,131,145],"a":[69,88,142,163],"method.":[77],"Memory":[78],"utilization":[79],"bottlenecks":[80],"in":[81,238,275],"method":[84],"used":[85],"to":[86,140,207,256],"decompose":[87,258],"matrix":[89,102,144,224,260,266],"<tex":[90,103,112,121,158],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[91,104,113,122,159],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$A$</tex>":[92,114,123],"are":[93,170,226],"typically":[94],"associated":[95,180,211],"computation":[98],"Gram":[101],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$A^{T}A$</tex>":[105,160],",":[106],"be":[109],"significant":[110],"when":[111,120,235],"large":[116],"dense,":[118],"or":[119],"super-large":[125],"sparse.":[127],"The":[128],"proposed":[129],"optimized":[132,203],"problems":[135],"where":[136,166],"required":[139],"factorize":[141],"given":[143],"greater":[146],"than":[147],"available":[149],"GPU":[150,230],"memory.":[151],"reduce":[153,208],"by":[161,173,190],"using":[162,197],"batching":[164],"strategy":[165],"intermediate":[168],"factors":[169],"computed":[171],"block":[172],"block,":[174],"hide":[177],"I/O":[178],"latency":[179,210],"both":[182],"host-to-device":[183],"(H2D)":[184],"device-to-host":[186],"(D2H)":[187],"batch":[188,193],"copies":[189],"overlapping":[191],"each":[192],"copy":[194],"compute":[196],"CUDA":[198],"streams.":[199],"Furthermore,":[200],"use":[202],"NCCL":[204],"communicators":[206],"collective":[213],"communications":[214],"(both":[215],"intra-node":[216],"inter-node).":[218],"addition,":[220],"sparse":[221,265],"dense":[223,259,276],"multiplications":[225],"accelerated":[228],"cores":[231,234],"(or":[232],"tensors":[233],"available),":[236],"resulting":[237],"good":[242],"scaling.":[243],"demonstrate":[245],"scalability":[247],"our":[249],"distributed":[250],"out":[251],"core":[253],"algorithm":[255],"successfully":[257],"size":[262,271],"1TB":[263],"le-6":[268],"sparsity":[269],"128":[273],"PB":[274],"format.":[277]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
