{"id":"https://openalex.org/W4409248690","doi":"https://doi.org/10.1109/hpca61900.2025.00091","title":"WarpDrive: GPU-Based Fully Homomorphic Encryption Acceleration Leveraging Tensor and CUDA Cores","display_name":"WarpDrive: GPU-Based Fully Homomorphic Encryption Acceleration Leveraging Tensor and CUDA Cores","publication_year":2025,"publication_date":"2025-03-01","ids":{"openalex":"https://openalex.org/W4409248690","doi":"https://doi.org/10.1109/hpca61900.2025.00091"},"language":"en","primary_location":{"id":"doi:10.1109/hpca61900.2025.00091","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00091","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079491959","display_name":"Fan Guang","orcid":"https://orcid.org/0000-0002-5389-0823"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Guang Fan","raw_affiliation_strings":["Ant Group,Hangzhou,China"],"affiliations":[{"raw_affiliation_string":"Ant Group,Hangzhou,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101985756","display_name":"Mingzhe Zhang","orcid":"https://orcid.org/0000-0003-2279-7025"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mingzhe Zhang","raw_affiliation_strings":["Ant Group,Hangzhou,China"],"affiliations":[{"raw_affiliation_string":"Ant Group,Hangzhou,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082689200","display_name":"Fangyu Zheng","orcid":"https://orcid.org/0000-0003-0490-2485"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fangyu Zheng","raw_affiliation_strings":["University of Chinese Academy of Sciences,School of Cryptology,Beijing,China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences,School of Cryptology,Beijing,China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053382926","display_name":"Shengyu Fan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengyu Fan","raw_affiliation_strings":["Institute of Information Engineering, CAS,Key Laboratory of Cyberspace Security Defense,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, CAS,Key Laboratory of Cyberspace Security Defense,Beijing,China","institution_ids":["https://openalex.org/I4210156404"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111360574","display_name":"Tian Zhou","orcid":"https://orcid.org/0009-0009-1284-0182"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tian Zhou","raw_affiliation_strings":["University of Chinese Academy of Sciences,School of Cryptology,Beijing,China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences,School of Cryptology,Beijing,China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100546294","display_name":"Xianglong Deng","orcid":"https://orcid.org/0009-0002-2058-5109"},"institutions":[{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xianglong Deng","raw_affiliation_strings":["Institute of Information Engineering, CAS,Key Laboratory of Cyberspace Security Defense,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, CAS,Key Laboratory of Cyberspace Security Defense,Beijing,China","institution_ids":["https://openalex.org/I4210156404"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104257371","display_name":"Wenxu Tang","orcid":"https://orcid.org/0009-0009-1761-3683"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenxu Tang","raw_affiliation_strings":["University of Chinese Academy of Sciences,School of Cryptology,Beijing,China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences,School of Cryptology,Beijing,China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046524020","display_name":"Liang Kong","orcid":"https://orcid.org/0000-0001-9871-0358"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang Kong","raw_affiliation_strings":["Ant Group,Hangzhou,China"],"affiliations":[{"raw_affiliation_string":"Ant Group,Hangzhou,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100780795","display_name":"Yixuan Song","orcid":"https://orcid.org/0000-0001-8226-367X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yixuan Song","raw_affiliation_strings":["Ant Group,Hangzhou,China"],"affiliations":[{"raw_affiliation_string":"Ant Group,Hangzhou,China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049135681","display_name":"Shoumeng Yan","orcid":"https://orcid.org/0009-0007-9580-5395"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shoumeng Yan","raw_affiliation_strings":["Ant Group,Hangzhou,China"],"affiliations":[{"raw_affiliation_string":"Ant Group,Hangzhou,China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5079491959"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":32.8454,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.99824453,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1187","last_page":"1200"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.963699996471405,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.963699996471405,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11693","display_name":"Cryptography and Residue Arithmetic","score":0.9552000164985657,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9142000079154968,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.8978786468505859},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8019818663597107},{"id":"https://openalex.org/keywords/homomorphic-encryption","display_name":"Homomorphic encryption","score":0.8014435172080994},{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.6980957388877869},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.5820194482803345},{"id":"https://openalex.org/keywords/graphics-processing-unit","display_name":"Graphics processing unit","score":0.526073157787323},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5253074765205383},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.4270877540111542},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.4081733226776123},{"id":"https://openalex.org/keywords/encryption","display_name":"Encryption","score":0.3639316260814667},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.2642817795276642},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.25576767325401306},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.10914599895477295},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.0720430314540863},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.06829959154129028}],"concepts":[{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.8978786468505859},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8019818663597107},{"id":"https://openalex.org/C158338273","wikidata":"https://www.wikidata.org/wiki/Q2154943","display_name":"Homomorphic encryption","level":3,"score":0.8014435172080994},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.6980957388877869},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.5820194482803345},{"id":"https://openalex.org/C2779851693","wikidata":"https://www.wikidata.org/wiki/Q183484","display_name":"Graphics processing unit","level":2,"score":0.526073157787323},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5253074765205383},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.4270877540111542},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.4081733226776123},{"id":"https://openalex.org/C148730421","wikidata":"https://www.wikidata.org/wiki/Q141090","display_name":"Encryption","level":2,"score":0.3639316260814667},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.2642817795276642},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.25576767325401306},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.10914599895477295},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0720430314540863},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.06829959154129028},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca61900.2025.00091","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00091","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.8199999928474426,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W913176383","https://openalex.org/W1498685334","https://openalex.org/W2177209050","https://openalex.org/W2309997581","https://openalex.org/W2768174108","https://openalex.org/W2794634409","https://openalex.org/W2934510082","https://openalex.org/W2942255051","https://openalex.org/W2966536036","https://openalex.org/W2969350772","https://openalex.org/W3006531732","https://openalex.org/W3012235108","https://openalex.org/W3012457899","https://openalex.org/W3094696138","https://openalex.org/W3116767319","https://openalex.org/W3159746013","https://openalex.org/W3207326900","https://openalex.org/W4205474236","https://openalex.org/W4236580217","https://openalex.org/W4237773356","https://openalex.org/W4280633999","https://openalex.org/W4281609193","https://openalex.org/W4281792301","https://openalex.org/W4313855966","https://openalex.org/W4324007231","https://openalex.org/W4360831809","https://openalex.org/W4360831815","https://openalex.org/W4360831824","https://openalex.org/W4360831964","https://openalex.org/W4380881143","https://openalex.org/W4384705451","https://openalex.org/W4386942772","https://openalex.org/W4388923270","https://openalex.org/W4388925834","https://openalex.org/W4389395031","https://openalex.org/W4391661643","https://openalex.org/W4392753823","https://openalex.org/W6636949950","https://openalex.org/W6778434676","https://openalex.org/W6853033738","https://openalex.org/W6858028832","https://openalex.org/W6875470641","https://openalex.org/W7067201269"],"related_works":["https://openalex.org/W1963859303","https://openalex.org/W2364044215","https://openalex.org/W2389600408","https://openalex.org/W240129890","https://openalex.org/W3048701459","https://openalex.org/W2149078538","https://openalex.org/W2370314112","https://openalex.org/W1912958759","https://openalex.org/W2792081825","https://openalex.org/W2893308117"],"abstract_inverted_index":{"The":[0],"application":[1],"of":[2,51,61,89,117,136,140,179,191,210,236],"Fully":[3],"Homomorphic":[4],"Encryption":[5],"(FHE)":[6],"is":[7],"rapidly":[8],"gaining":[9],"traction":[10],"as":[11],"a":[12,68,110,189,224],"means":[13],"to":[14,103,150,159],"maintain":[15],"data":[16],"confidentiality":[17],"while":[18],"performing":[19],"computations":[20],"on":[21],"encrypted":[22],"data.":[23],"Given":[24],"the":[25,48,87,104,114,124,128,147,164,177,203,218,234],"accessibility":[26],"and":[27,58,79,95,120,154,196,213],"computational":[28],"power,":[29],"GPUs":[30],"hold":[31],"promise":[32],"for":[33,71,127,166,194,199],"significantly":[34,85],"accelerating":[35],"FHE":[36,73,220],"operations.":[37,181],"However,":[38],"existing":[39],"GPU-based":[40,72],"acceleration":[41],"solutions":[42],"face":[43],"several":[44],"formidable":[45],"challenges,":[46],"notably":[47],"extensive":[49],"occurrence":[50],"pipeline":[52,96],"stalls":[53,97],"induced":[54],"by":[55,91,98,208],"memory":[56,81,155],"access":[57,82],"suboptimal":[59],"harnessing":[60],"GPU":[62,205],"hardware.":[63],"This":[64],"paper":[65],"presents":[66],"WarpDrive,":[67],"comprehensive":[69],"framework":[70,111],"acceleration.":[74],"Through":[75],"sophisticated":[76],"computation":[77,153],"decomposition":[78],"fine-grained":[80],"design,":[83],"WarpDrive":[84,108,187],"reduces":[86],"number":[88],"instructions":[90],"$\\mathbf{7":[92],"3":[93],"\\%}$":[94,101],"$\\mathbf{8":[99],"6":[100],"compared":[102],"state-of-the-art":[105,204],"solution.":[106],"Additionally,":[107],"features":[109],"that":[112,133,135,172],"supports":[113],"concurrent":[115],"utilization":[116],"CUDA":[118],"Cores":[119,122],"Tensor":[121],"within":[123],"NTT":[125,195],"operation,":[126],"first":[129],"time,":[130],"achieving":[131,157],"performance":[132,178,235],"surpasses":[134],"any":[137],"single":[138],"type":[139],"processing":[141],"unit.":[142],"Furthermore,":[143],"we":[144],"fully":[145],"exploit":[146],"intra-ciphertext":[148],"parallelism":[149],"elevate":[151],"both":[152],"utilization,":[156],"up":[158],"$2.12":[160],"\\times$":[161,212,233],"improvements":[162],"without":[163],"need":[165],"ciphertext":[167],"batching.":[168],"Experimental":[169],"results":[170],"demonstrate":[171],"our":[173,229],"optimizations":[174],"highly":[175],"enhance":[176],"homomorphic":[180,200],"On":[182],"an":[183],"NVIDIA":[184],"A100":[185],"GPU,":[186],"achieves":[188,231],"throughput":[190],"1218":[192],"KOPS":[193,198],"305":[197],"multiplication,":[201],"outperforming":[202],"solution":[206],"(TensorFHE)":[207],"factors":[209],"$13.4":[211],"$3.5":[214],"\\times$,":[215],"respectively.":[216],"For":[217],"specific":[219],"workload,":[221],"even":[222],"under":[223],"much":[225],"smaller":[226],"batch":[227],"size,":[228],"approach":[230],"$2.8":[232],"TensorFHE.":[237]},"counts_by_year":[{"year":2026,"cited_by_count":11},{"year":2025,"cited_by_count":4}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
