{"id":"https://openalex.org/W7092203632","doi":"https://doi.org/10.1109/lca.2025.3622588","title":"Thread-Adaptive: High-Throughput Parallel Architectures of SLH-DSA on GPUs","display_name":"Thread-Adaptive: High-Throughput Parallel Architectures of SLH-DSA on GPUs","publication_year":2025,"publication_date":"2025-07-01","ids":{"openalex":"https://openalex.org/W7092203632","doi":"https://doi.org/10.1109/lca.2025.3622588"},"language":null,"primary_location":{"id":"doi:10.1109/lca.2025.3622588","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lca.2025.3622588","pdf_url":null,"source":{"id":"https://openalex.org/S17643076","display_name":"IEEE Computer Architecture Letters","issn_l":"1556-6056","issn":["1556-6056","1556-6064","2473-2575"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Computer Architecture Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jiahao Xiang","orcid":"https://orcid.org/0009-0000-0418-4298"},"institutions":[{"id":"https://openalex.org/I149735164","display_name":"Hengyang Normal University","ror":"https://ror.org/006bvjm48","country_code":"CN","type":"education","lineage":["https://openalex.org/I149735164"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiahao Xiang","raw_affiliation_strings":["Hunan Provincial Key Laboratory of Intelligent Information Processing and Application, Hunan Engineering Research Center of Cyberspace Security Technology and Applications, College of Computer Science and Technology, Hengyang Normal University, Hengyang, China"],"affiliations":[{"raw_affiliation_string":"Hunan Provincial Key Laboratory of Intelligent Information Processing and Application, Hunan Engineering Research Center of Cyberspace Security Technology and Applications, College of Computer Science and Technology, Hengyang Normal University, Hengyang, China","institution_ids":["https://openalex.org/I149735164"]}]},{"author_position":"last","author":{"id":null,"display_name":"Lang Li","orcid":"https://orcid.org/0000-0002-4832-4499"},"institutions":[{"id":"https://openalex.org/I149735164","display_name":"Hengyang Normal University","ror":"https://ror.org/006bvjm48","country_code":"CN","type":"education","lineage":["https://openalex.org/I149735164"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lang Li","raw_affiliation_strings":["Hunan Provincial Key Laboratory of Intelligent Information Processing and Application, Hunan Engineering Research Center of Cyberspace Security Technology and Applications, College of Computer Science and Technology, Hengyang Normal University, Hengyang, China"],"affiliations":[{"raw_affiliation_string":"Hunan Provincial Key Laboratory of Intelligent Information Processing and Application, Hunan Engineering Research Center of Cyberspace Security Technology and Applications, College of Computer Science and Technology, Hengyang Normal University, Hengyang, China","institution_ids":["https://openalex.org/I149735164"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I149735164"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.77897318,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"24","issue":"2","first_page":"329","last_page":"332"},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10951","display_name":"Cryptographic Implementations and Security","score":0.7407000064849854,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10951","display_name":"Cryptographic Implementations and Security","score":0.7407000064849854,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11693","display_name":"Cryptography and Residue Arithmetic","score":0.11110000312328339,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10237","display_name":"Cryptography and Data Security","score":0.07209999859333038,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.6186000108718872},{"id":"https://openalex.org/keywords/cryptography","display_name":"Cryptography","score":0.520799994468689},{"id":"https://openalex.org/keywords/cryptographic-primitive","display_name":"Cryptographic primitive","score":0.43799999356269836},{"id":"https://openalex.org/keywords/multithreading","display_name":"Multithreading","score":0.4205000102519989},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.41830000281333923},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.3887999951839447},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.3774000108242035},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.35989999771118164},{"id":"https://openalex.org/keywords/coprocessor","display_name":"Coprocessor","score":0.3578999936580658}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8995000123977661},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6771000027656555},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.6186000108718872},{"id":"https://openalex.org/C178489894","wikidata":"https://www.wikidata.org/wiki/Q8789","display_name":"Cryptography","level":2,"score":0.520799994468689},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4551999866962433},{"id":"https://openalex.org/C15927051","wikidata":"https://www.wikidata.org/wiki/Q246593","display_name":"Cryptographic primitive","level":4,"score":0.43799999356269836},{"id":"https://openalex.org/C201410400","wikidata":"https://www.wikidata.org/wiki/Q1064412","display_name":"Multithreading","level":3,"score":0.4205000102519989},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.41830000281333923},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.3887999951839447},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.3774000108242035},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.35989999771118164},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.3578999936580658},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.35499998927116394},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.32420000433921814},{"id":"https://openalex.org/C107598950","wikidata":"https://www.wikidata.org/wiki/Q259864","display_name":"Microarchitecture","level":2,"score":0.31349998712539673},{"id":"https://openalex.org/C41138395","wikidata":"https://www.wikidata.org/wiki/Q928112","display_name":"POSIX Threads","level":3,"score":0.30059999227523804},{"id":"https://openalex.org/C2776834041","wikidata":"https://www.wikidata.org/wiki/Q25346349","display_name":"Execution model","level":2,"score":0.3000999987125397},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.29919999837875366},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2955999970436096},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.2939999997615814},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.290800005197525},{"id":"https://openalex.org/C42992933","wikidata":"https://www.wikidata.org/wiki/Q691169","display_name":"Task parallelism","level":3,"score":0.2906999886035919},{"id":"https://openalex.org/C2780870223","wikidata":"https://www.wikidata.org/wiki/Q1004415","display_name":"Runtime system","level":2,"score":0.2842000126838684},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.28139999508857727},{"id":"https://openalex.org/C140763907","wikidata":"https://www.wikidata.org/wiki/Q2714055","display_name":"Instruction-level parallelism","level":3,"score":0.26330000162124634},{"id":"https://openalex.org/C103613024","wikidata":"https://www.wikidata.org/wiki/Q230924","display_name":"Stateless protocol","level":3,"score":0.26100000739097595}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lca.2025.3622588","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lca.2025.3622588","pdf_url":null,"source":{"id":"https://openalex.org/S17643076","display_name":"IEEE Computer Architecture Letters","issn_l":"1556-6056","issn":["1556-6056","1556-6064","2473-2575"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Computer Architecture Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W2987483625","https://openalex.org/W3189004867","https://openalex.org/W4280552546","https://openalex.org/W4312634035","https://openalex.org/W4323644127","https://openalex.org/W4392502209","https://openalex.org/W4402451239"],"related_works":[],"abstract_inverted_index":{"The":[0],"emergence":[1],"of":[2,102],"quantum":[3],"computing":[4],"threatens":[5],"classical":[6],"cryptographic":[7,70,82],"systems,":[8],"necessitating":[9],"efficient":[10],"architectural":[11,21,48,115],"designs":[12],"for":[13,23,122],"post-quantum":[14],"algorithms.":[15],"This":[16],"paper":[17],"presents":[18],"a":[19,46,129],"novel":[20],"approach":[22],"implementing":[24],"the":[25,67,99,123,154],"FIPS":[26],"205":[27],"Stateless":[28],"Hash-based":[29],"Digital":[30],"Signature":[31],"Algorithm":[32],"(SLH-DSA)":[33],"on":[34,62,107],"GPUs":[35],"through":[36],"execution":[37,74,94,156],"model":[38],"optimizations":[39],"that":[40,56,96,113,138],"maximize":[41],"hardware":[42],"utilization.":[43],"We":[44],"introduce":[45],"two-tier":[47],"framework:":[49],"first,":[50],"an":[51,108],"Adaptive":[52],"Thread":[53],"Allocation":[54],"mechanism":[55],"dynamically":[57],"configures":[58],"thread-level":[59],"parallelism":[60],"based":[61],"empirical":[63],"performance":[64],"modeling,":[65],"optimizing":[66],"mapping":[68],"between":[69],"workloads":[71],"and":[72,93,149],"GPU":[73],"resources.":[75],"Second,":[76],"our":[77,114],"Function-Level":[78],"Parallelism":[79],"design":[80,116],"decomposes":[81],"components":[83],"into":[84],"fine-grained":[85],"computational":[86],"units":[87],"with":[88],"optimized":[89,145],"memory":[90,147],"access":[91],"patterns":[92],"flows":[95],"better":[97],"utilize":[98],"SIMT":[100],"architecture":[101],"modern":[103],"GPUs.":[104],"Performance":[105],"evaluation":[106],"NVIDIA":[109],"RTX":[110],"4090":[111],"demonstrates":[112],"achieves":[117],"62,239":[118],"signatures":[119],"per":[120],"second":[121],"SHA2":[124],"128f":[125],"parameter":[126],"set,":[127],"representing":[128],"1.16\u00d7":[130],"improvement":[131],"over":[132],"prior":[133],"implementations.":[134],"Architectural":[135],"analysis":[136],"reveals":[137],"this":[139],"throughput":[140],"enhancement":[141],"stems":[142],"primarily":[143],"from":[144],"thread":[146],"interactions":[148],"reduced":[150],"resource":[151],"contention":[152],"in":[153],"GPU's":[155],"units.":[157]},"counts_by_year":[],"updated_date":"2025-11-06T23:17:08.748858","created_date":"2025-10-17T00:00:00"}
