{"id":"https://openalex.org/W3167411928","doi":"https://doi.org/10.1145/3505287","title":"Bandwidth-Optimal Random Shuffling for GPUs","display_name":"Bandwidth-Optimal Random Shuffling for GPUs","publication_year":2022,"publication_date":"2022-01-31","ids":{"openalex":"https://openalex.org/W3167411928","doi":"https://doi.org/10.1145/3505287","mag":"3167411928"},"language":"en","primary_location":{"id":"doi:10.1145/3505287","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3505287","pdf_url":null,"source":{"id":"https://openalex.org/S2483380313","display_name":"ACM Transactions on Parallel Computing","issn_l":"2329-4949","issn":["2329-4949","2329-4957"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Parallel Computing","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2106.06161","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012569786","display_name":"Rory Mitchell","orcid":"https://orcid.org/0000-0003-2892-1082"},"institutions":[{"id":"https://openalex.org/I52179390","display_name":"University of Waikato","ror":"https://ror.org/013fsnh78","country_code":"NZ","type":"education","lineage":["https://openalex.org/I52179390"]}],"countries":["NZ"],"is_corresponding":true,"raw_author_name":"Rory Mitchell","raw_affiliation_strings":["Nvidia and Waikato University, Hamilton, New Zealand"],"raw_orcid":"https://orcid.org/0000-0003-2892-1082","affiliations":[{"raw_affiliation_string":"Nvidia and Waikato University, Hamilton, New Zealand","institution_ids":["https://openalex.org/I52179390"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017938415","display_name":"Daniel Stokes","orcid":"https://orcid.org/0000-0002-6995-3307"},"institutions":[{"id":"https://openalex.org/I52179390","display_name":"University of Waikato","ror":"https://ror.org/013fsnh78","country_code":"NZ","type":"education","lineage":["https://openalex.org/I52179390"]}],"countries":["NZ"],"is_corresponding":false,"raw_author_name":"Daniel Stokes","raw_affiliation_strings":["Waikato University and Nyriad Ltd., Hamilton, New Zealand"],"raw_orcid":"https://orcid.org/0000-0002-6995-3307","affiliations":[{"raw_affiliation_string":"Waikato University and Nyriad Ltd., Hamilton, New Zealand","institution_ids":["https://openalex.org/I52179390"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059992863","display_name":"Eibe Frank","orcid":"https://orcid.org/0000-0001-6152-7111"},"institutions":[{"id":"https://openalex.org/I52179390","display_name":"University of Waikato","ror":"https://ror.org/013fsnh78","country_code":"NZ","type":"education","lineage":["https://openalex.org/I52179390"]}],"countries":["NZ"],"is_corresponding":false,"raw_author_name":"Eibe Frank","raw_affiliation_strings":["Waikato University, Hamilton, New Zealand"],"raw_orcid":"https://orcid.org/0000-0001-6152-7111","affiliations":[{"raw_affiliation_string":"Waikato University, Hamilton, New Zealand","institution_ids":["https://openalex.org/I52179390"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063502807","display_name":"Geoffrey Holmes","orcid":"https://orcid.org/0000-0003-0433-8925"},"institutions":[{"id":"https://openalex.org/I52179390","display_name":"University of Waikato","ror":"https://ror.org/013fsnh78","country_code":"NZ","type":"education","lineage":["https://openalex.org/I52179390"]}],"countries":["NZ"],"is_corresponding":false,"raw_author_name":"Geoffrey Holmes","raw_affiliation_strings":["Waikato University, Hamilton, New Zealand"],"raw_orcid":"https://orcid.org/0000-0003-0433-8925","affiliations":[{"raw_affiliation_string":"Waikato University, Hamilton, New Zealand","institution_ids":["https://openalex.org/I52179390"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5012569786"],"corresponding_institution_ids":["https://openalex.org/I52179390"],"apc_list":null,"apc_paid":null,"fwci":0.1387,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.50868256,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"9","issue":"1","first_page":"1","last_page":"20"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/shuffling","display_name":"Shuffling","score":0.8264905214309692},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8186969757080078},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7371155023574829},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.4693528115749359},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4530092179775238},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.43117761611938477}],"concepts":[{"id":"https://openalex.org/C167927819","wikidata":"https://www.wikidata.org/wiki/Q1930567","display_name":"Shuffling","level":2,"score":0.8264905214309692},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8186969757080078},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7371155023574829},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.4693528115749359},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4530092179775238},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.43117761611938477},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1145/3505287","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3505287","pdf_url":null,"source":{"id":"https://openalex.org/S2483380313","display_name":"ACM Transactions on Parallel Computing","issn_l":"2329-4949","issn":["2329-4949","2329-4957"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Parallel Computing","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2106.06161","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.06161","pdf_url":"https://arxiv.org/pdf/2106.06161","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"pmh:oai:researchcommons.waikato.ac.nz:10289/16345","is_oa":true,"landing_page_url":"https://hdl.handle.net/10289/16345","pdf_url":"https://researchcommons.waikato.ac.nz/bitstreams/970bb2f7-6418-494c-9e51-19099d3e1581/download","source":{"id":"https://openalex.org/S4306400944","display_name":"Research Commons (University of Waikato)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I52179390","host_organization_name":"University of Waikato","host_organization_lineage":["https://openalex.org/I52179390"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"},{"id":"mag:3167411928","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/2106.06161","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2106.06161","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2106.06161","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2106.06161","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.06161","pdf_url":"https://arxiv.org/pdf/2106.06161","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":60,"referenced_works":["https://openalex.org/W36167541","https://openalex.org/W40887843","https://openalex.org/W89768008","https://openalex.org/W637655573","https://openalex.org/W1480958225","https://openalex.org/W1536852470","https://openalex.org/W1551639328","https://openalex.org/W1569736468","https://openalex.org/W1595783387","https://openalex.org/W1596709027","https://openalex.org/W1667652561","https://openalex.org/W1784406605","https://openalex.org/W1863668774","https://openalex.org/W1905750377","https://openalex.org/W1963898916","https://openalex.org/W1968220892","https://openalex.org/W1981177867","https://openalex.org/W1981663184","https://openalex.org/W2013444956","https://openalex.org/W2016677154","https://openalex.org/W2023518480","https://openalex.org/W2025440555","https://openalex.org/W2049742196","https://openalex.org/W2054215380","https://openalex.org/W2058402896","https://openalex.org/W2059966434","https://openalex.org/W2060690113","https://openalex.org/W2070262346","https://openalex.org/W2074886859","https://openalex.org/W2075075402","https://openalex.org/W2077300005","https://openalex.org/W2082241581","https://openalex.org/W2082695854","https://openalex.org/W2095595785","https://openalex.org/W2101111919","https://openalex.org/W2120490947","https://openalex.org/W2122817518","https://openalex.org/W2123674036","https://openalex.org/W2139774022","https://openalex.org/W2143969840","https://openalex.org/W2149410722","https://openalex.org/W2157801062","https://openalex.org/W2160837696","https://openalex.org/W2161061943","https://openalex.org/W2162390675","https://openalex.org/W2212660284","https://openalex.org/W2240426913","https://openalex.org/W2346052963","https://openalex.org/W2587429114","https://openalex.org/W2604808181","https://openalex.org/W2734491470","https://openalex.org/W2796221598","https://openalex.org/W2904583894","https://openalex.org/W2962983095","https://openalex.org/W3137786818","https://openalex.org/W3157730516","https://openalex.org/W4210402647","https://openalex.org/W4221099263","https://openalex.org/W4233413206","https://openalex.org/W6683923952"],"related_works":["https://openalex.org/W2965018236","https://openalex.org/W2028158226","https://openalex.org/W2922815438","https://openalex.org/W2751568363","https://openalex.org/W2763158813","https://openalex.org/W3105209461","https://openalex.org/W3180049492","https://openalex.org/W2327546063","https://openalex.org/W2086337948","https://openalex.org/W2089353209","https://openalex.org/W2787748416","https://openalex.org/W3183439381","https://openalex.org/W2740256441","https://openalex.org/W1857322307","https://openalex.org/W2339815978","https://openalex.org/W2885459432","https://openalex.org/W2946862025","https://openalex.org/W3135988214","https://openalex.org/W2147930494","https://openalex.org/W2056865774"],"abstract_inverted_index":{"Linear-time":[0],"algorithms":[1,34,150],"that":[2,143],"are":[3,17,35],"traditionally":[4],"used":[5],"to":[6,21,26,49],"shuffle":[7,107,146],"data":[8],"on":[9,23,136,151],"CPUs,":[10],"such":[11],"as":[12],"the":[13,119,122,130,144],"method":[14,60],"of":[15,46,61,112,121,132,155,161],"Fisher-Yates,":[16],"not":[18],"well":[19],"suited":[20],"implementation":[22],"GPUs":[24],"due":[25],"inherent":[27],"sequential":[28],"dependencies,":[29],"and":[30,96,104,158,163],"existing":[31],"parallel":[32,66],"shuffling":[33],"unsuitable":[36],"for":[37,87,129],"GPU":[38],"architectures":[39],"because":[40],"they":[41],"incur":[42],"a":[43,59,99,126],"large":[44],"number":[45],"read/write":[47],"operations":[48,86],"high":[50],"latency":[51],"global":[52,89,101,113],"memory.":[53],"To":[54,116],"address":[55],"this,":[56],"we":[57,124],"provide":[58],"generating":[62],"pseudo-random":[63,70,133],"permutations":[64,134],"in":[65],"by":[67],"fusing":[68],"suitable":[69],"bijective":[71,145],"functions":[72],"with":[73],"stream":[74],"compaction":[75],"operations.":[76],"Our":[77],"algorithm,":[78,123],"termed":[79],"\u201cbijective":[80],"shuffle\u201d":[81],"trades":[82],"increased":[83],"per-thread":[84],"arithmetic":[85],"reduced":[88],"memory":[90,102,114],"transactions.":[91],"It":[92],"is":[93],"work-efficient,":[94],"deterministic,":[95],"only":[97],"requires":[98],"single":[100],"read":[103],"write":[105],"per":[106],"input,":[108],"thus":[109],"maximising":[110],"use":[111],"bandwidth.":[115,167],"empirically":[117],"demonstrate":[118],"correctness":[120],"develop":[125],"statistical":[127],"test":[128],"quality":[131],"based":[135],"kernel":[137],"space":[138],"embeddings.":[139],"Experimental":[140],"results":[141],"show":[142],"algorithm":[147],"outperforms":[148],"competing":[149],"GPUs,":[152],"showing":[153],"improvements":[154],"between":[156],"one":[157],"two":[159],"orders":[160],"magnitude":[162],"approaching":[164],"peak":[165],"device":[166]},"counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
