{"id":"https://openalex.org/W4414898454","doi":"https://doi.org/10.1109/cluster59342.2025.11186494","title":"Accelerating Key-Value Data Structures Using AVX-512 SIMD Extensions","display_name":"Accelerating Key-Value Data Structures Using AVX-512 SIMD Extensions","publication_year":2025,"publication_date":"2025-09-02","ids":{"openalex":"https://openalex.org/W4414898454","doi":"https://doi.org/10.1109/cluster59342.2025.11186494"},"language":"en","primary_location":{"id":"doi:10.1109/cluster59342.2025.11186494","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cluster59342.2025.11186494","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Cluster Computing (CLUSTER)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://pure.qub.ac.uk/en/publications/f682e709-15b6-4f63-93d5-1022e4a9e849","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063735421","display_name":"M. Reza HoseinyFarahabady","orcid":"https://orcid.org/0000-0002-7851-9377"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]},{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"MohammadReza HoseinyFarahabady","raw_affiliation_strings":["The University of Sydney, School of Computer Science,Centre for Distributed and High Performance Computing,Sydney,Australia,NSW 2006"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Sydney, School of Computer Science,Centre for Distributed and High Performance Computing,Sydney,Australia,NSW 2006","institution_ids":["https://openalex.org/I129604602","https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048747481","display_name":"Javid Taheri","orcid":"https://orcid.org/0000-0001-9194-010X"},"institutions":[{"id":"https://openalex.org/I43968019","display_name":"Karlstad University","ror":"https://ror.org/05s754026","country_code":"SE","type":"education","lineage":["https://openalex.org/I43968019"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Javid Taheri","raw_affiliation_strings":["Karlstad University,Department of Mathematics and Computer Science,Karlstad,Sweden"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Karlstad University,Department of Mathematics and Computer Science,Karlstad,Sweden","institution_ids":["https://openalex.org/I43968019"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015993565","display_name":"Albert Y. Zomaya","orcid":"https://orcid.org/0000-0002-3090-1059"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]},{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Albert Y. Zomaya","raw_affiliation_strings":["The University of Sydney, School of Computer Science,Centre for Distributed and High Performance Computing,Sydney,Australia,NSW 2006"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Sydney, School of Computer Science,Centre for Distributed and High Performance Computing,Sydney,Australia,NSW 2006","institution_ids":["https://openalex.org/I129604602","https://openalex.org/I114017466"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.2878454,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9868999719619751,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9868999719619751,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.972000002861023,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9595999717712402,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.6532999873161316},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.5679000020027161},{"id":"https://openalex.org/keywords/x86","display_name":"x86","score":0.5217999815940857},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.48820000886917114},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.48730000853538513},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.4860999882221222},{"id":"https://openalex.org/keywords/disjoint-sets","display_name":"Disjoint sets","score":0.4830000102519989},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.45899999141693115},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.45249998569488525}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8518999814987183},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7466999888420105},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.6532999873161316},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.5679000020027161},{"id":"https://openalex.org/C170723468","wikidata":"https://www.wikidata.org/wiki/Q182933","display_name":"x86","level":3,"score":0.5217999815940857},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.48820000886917114},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.48730000853538513},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.4860999882221222},{"id":"https://openalex.org/C45340560","wikidata":"https://www.wikidata.org/wiki/Q215382","display_name":"Disjoint sets","level":2,"score":0.4830000102519989},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.45899999141693115},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.45249998569488525},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.4392000138759613},{"id":"https://openalex.org/C67388219","wikidata":"https://www.wikidata.org/wiki/Q207440","display_name":"Hash table","level":3,"score":0.39649999141693115},{"id":"https://openalex.org/C2781357197","wikidata":"https://www.wikidata.org/wiki/Q5757597","display_name":"High memory","level":2,"score":0.3666999936103821},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.3562999963760376},{"id":"https://openalex.org/C144240696","wikidata":"https://www.wikidata.org/wiki/Q367204","display_name":"Address space","level":2,"score":0.3427000045776367},{"id":"https://openalex.org/C87431388","wikidata":"https://www.wikidata.org/wiki/Q2070573","display_name":"Perfect hash function","level":4,"score":0.3366999924182892},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.29330000281333923},{"id":"https://openalex.org/C2780728072","wikidata":"https://www.wikidata.org/wiki/Q5297","display_name":"Microprocessor","level":2,"score":0.29030001163482666},{"id":"https://openalex.org/C55526617","wikidata":"https://www.wikidata.org/wiki/Q719375","display_name":"Operand","level":2,"score":0.28619998693466187},{"id":"https://openalex.org/C189930140","wikidata":"https://www.wikidata.org/wiki/Q1112878","display_name":"CAS latency","level":4,"score":0.27720001339912415},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.2770000100135803},{"id":"https://openalex.org/C111696304","wikidata":"https://www.wikidata.org/wiki/Q2303697","display_name":"Sorting","level":2,"score":0.27230000495910645},{"id":"https://openalex.org/C134835016","wikidata":"https://www.wikidata.org/wiki/Q690265","display_name":"Lookup table","level":2,"score":0.26840001344680786},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.2612000107765198},{"id":"https://openalex.org/C153247305","wikidata":"https://www.wikidata.org/wiki/Q835713","display_name":"Memory address","level":3,"score":0.25029999017715454}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/cluster59342.2025.11186494","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cluster59342.2025.11186494","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Cluster Computing (CLUSTER)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.qub.ac.uk/portal:openaire/f682e709-15b6-4f63-93d5-1022e4a9e849","is_oa":true,"landing_page_url":"https://pure.qub.ac.uk/en/publications/f682e709-15b6-4f63-93d5-1022e4a9e849","pdf_url":null,"source":{"id":"https://openalex.org/S4306402319","display_name":"Research Portal (Queen's University Belfast)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I126231945","host_organization_name":"Queen's University Belfast","host_organization_lineage":["https://openalex.org/I126231945"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Hoseinyfarahabady, M R, Taheri, J & Zomaya, A Y 2025, Accelerating key-value data structures using AVX-512 SIMD extensions. in Proceedings of the 2025 IEEE International Conference on Cluster Computing, CLUSTER 2025. Proceedings - IEEE International Conference on Cluster Computing, ICCC, Institute of Electrical and Electronics Engineers Inc., 2025 IEEE International Conference on Cluster Computing, CLUSTER 2025, Edinburgh, United Kingdom, 03/09/2025. https://doi.org/10.1109/CLUSTER59342.2025.11186494","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"pmh:oai:pure.qub.ac.uk/portal:openaire/f682e709-15b6-4f63-93d5-1022e4a9e849","is_oa":true,"landing_page_url":"https://pure.qub.ac.uk/en/publications/f682e709-15b6-4f63-93d5-1022e4a9e849","pdf_url":null,"source":{"id":"https://openalex.org/S4306402319","display_name":"Research Portal (Queen's University Belfast)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I126231945","host_organization_name":"Queen's University Belfast","host_organization_lineage":["https://openalex.org/I126231945"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Hoseinyfarahabady, M R, Taheri, J & Zomaya, A Y 2025, Accelerating key-value data structures using AVX-512 SIMD extensions. in Proceedings of the 2025 IEEE International Conference on Cluster Computing, CLUSTER 2025. Proceedings - IEEE International Conference on Cluster Computing, ICCC, Institute of Electrical and Electronics Engineers Inc., 2025 IEEE International Conference on Cluster Computing, CLUSTER 2025, Edinburgh, United Kingdom, 03/09/2025. https://doi.org/10.1109/CLUSTER59342.2025.11186494","raw_type":"info:eu-repo/semantics/conferenceObject"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6598234974","display_name":null,"funder_award_id":"NI220100111","funder_id":"https://openalex.org/F4320337300","funder_display_name":"Office of National Intelligence"}],"funders":[{"id":"https://openalex.org/F4320320966","display_name":"University of Sydney","ror":"https://ror.org/0384j8v12"},{"id":"https://openalex.org/F4320337300","display_name":"Office of National Intelligence","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W2055754268","https://openalex.org/W2077229436","https://openalex.org/W3006086507","https://openalex.org/W3011351775","https://openalex.org/W3048839782","https://openalex.org/W3176017347","https://openalex.org/W4255148642","https://openalex.org/W4372265742","https://openalex.org/W4386125394"],"related_works":[],"abstract_inverted_index":{"Advanced":[0],"Vector":[1],"Extensions":[2],"512":[3],"(AVX-512),":[4],"a":[5,29,38,160,213,236],"modern":[6],"SIMD":[7],"instruction":[8,31],"set":[9],"for":[10,43,196,235,256],"x86":[11],"architectures,":[12],"enables":[13],"data-level":[14],"parallelism":[15],"through":[16],"512-bit":[17],"wide":[18],"ZMM":[19],"registers":[20],"capable":[21],"of":[22,138,209,266,284],"processing":[23],"multiple":[24],"data":[25],"elements":[26],"concurrently":[27],"within":[28,136],"single":[30],"cycle.":[32],"In":[33],"this":[34,107],"study,":[35],"we":[36],"present":[37],"high-throughput,":[39],"lock-free,":[40],"in-memory":[41],"architecture":[42],"key-value":[44],"data-stores":[45],"that":[46,67,106,263],"exploits":[47],"AVX-512":[48,122,267],"vector":[49,246,275],"operations":[50,54],"to":[51,85,180,187,216],"accelerate":[52],"fundamental":[53],"such":[55,269],"as":[56,212,253,270],"insertion":[57,112,134],"and":[58,77,79,98,159,174,178,189,199,226,241,248,277],"lookup.":[59],"Our":[60,232],"design":[61],"introduces":[62],"an":[63],"optimized":[64,128,142],"memory":[65,117,129,250,272],"layout":[66,91],"partitions":[68],"the":[69,110,207,282],"key":[70,93,202],"space":[71],"into":[72],"two":[73],"disjoint":[74],"regions":[75],"(primary":[76],"secondary)":[78],"employs":[80],"three":[81],"independent":[82],"hash":[83,144,176],"functions":[84],"identify":[86],"candidate":[87],"slots.":[88],"This":[89],"asymmetric":[90],"improves":[92],"distribution,":[94],"reduces":[95],"collision":[96],"probability,":[97],"enhances":[99],"overall":[100],"lookup":[101],"efficiency.":[102],"Experimental":[103],"evaluation":[104],"shows":[105],"strategy":[108],"yields":[109],"lowest":[111],"failure":[113],"rate":[114],"among":[115],"tested":[116],"partitioning":[118],"schemes.":[119],"By":[120],"leveraging":[121],"instructions":[123],"in":[124,220,239],"combination":[125],"with":[126,155],"most":[127],"layout,":[130],"our":[131,164],"implementation":[132],"achieves":[133],"throughput":[135],"6%":[137],"Intel":[139],"TBB's":[140],"highly":[141],"multithreaded":[143],"map,":[145],"despite":[146],"avoiding":[147],"explicit":[148],"synchronization":[149,224],"or":[150],"thread-level":[151,217],"parallelism.":[152],"Under":[153],"workloads":[154],"550":[156],"million":[157],"entries":[158],"90%":[161],"miss":[162],"rate,":[163],"approach":[165],"delivers":[166],"4.0-5.1x":[167],"speedup":[168],"over":[169],"standard":[170],"STL,":[171],"Boost,":[172],"Robin-Hood,":[173],"Abseil":[175],"maps,":[177],"up":[179],"<tex":[181],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[182],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$2.5":[183],"x$</tex>":[184],"improvement":[185],"relative":[186],"TBB":[188],"Abseil.":[190],"These":[191,260],"gains":[192],"are":[193,230],"consistently":[194],"observed":[195],"both":[197],"32-bit":[198],"64-bit":[200],"floating-point":[201],"types.":[203],"The":[204],"results":[205],"confirm":[206],"viability":[208],"AVX-512-centric":[210],"designs":[211],"cost-effective":[214],"alternative":[215],"parallelism,":[218],"particularly":[219],"environments":[221],"where":[222],"minimizing":[223],"overhead":[225],"ensuring":[227],"deterministic":[228],"execution":[229],"critical.":[231],"findings":[233,261],"suggest":[234,262],"paradigm":[237],"shift":[238],"CPU":[240],"system":[242],"architecture,":[243],"emphasizing":[244],"wider":[245],"units":[247],"improved":[249],"bandwidth":[251],"utilization":[252],"primary":[254],"levers":[255],"scalable":[257],"high-performance":[258],"computing.":[259],"future":[264],"extensions":[265],"capabilities,":[268],"non-blocking":[271],"loads,":[273],"expanded":[274],"registers,":[276],"asynchronous":[278],"prefetching,":[279],"could":[280],"enhance":[281],"efficiency":[283],"data-intensive":[285],"workloads.":[286]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
