{"id":"https://openalex.org/W4388031374","doi":"https://doi.org/10.1145/3581784.3607062","title":"Parallel Top-K Algorithms on GPU: A Comprehensive Study and New Methods","display_name":"Parallel Top-K Algorithms on GPU: A Comprehensive Study and New Methods","publication_year":2023,"publication_date":"2023-10-30","ids":{"openalex":"https://openalex.org/W4388031374","doi":"https://doi.org/10.1145/3581784.3607062"},"language":"en","primary_location":{"id":"doi:10.1145/3581784.3607062","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3581784.3607062","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3581784.3607062","source":null,"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3581784.3607062","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075095992","display_name":"Jingrong Zhang","orcid":"https://orcid.org/0000-0002-7829-7348"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jingrong Zhang","raw_affiliation_strings":["NVIDIA, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050692022","display_name":"Akira Naruse","orcid":"https://orcid.org/0000-0002-3140-0854"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Akira Naruse","raw_affiliation_strings":["NVIDIA, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Tokyo, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009047207","display_name":"Xipeng Li","orcid":"https://orcid.org/0009-0009-9606-1599"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xipeng Li","raw_affiliation_strings":["NVIDIA, Beijing, China"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Beijing, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101762937","display_name":"Yong Wang","orcid":"https://orcid.org/0009-0005-0906-8778"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yong Wang","raw_affiliation_strings":["NVIDIA, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Shanghai, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5075095992"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.714,"has_fulltext":true,"cited_by_count":14,"citation_normalized_percentile":{"value":0.86929368,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"13"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11321","display_name":"Error Correcting Code Techniques","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.9514505863189697},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8326869010925293},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7246891856193542},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.49448418617248535},{"id":"https://openalex.org/keywords/queue","display_name":"Queue","score":0.4712265729904175}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.9514505863189697},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8326869010925293},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7246891856193542},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.49448418617248535},{"id":"https://openalex.org/C160403385","wikidata":"https://www.wikidata.org/wiki/Q220543","display_name":"Queue","level":2,"score":0.4712265729904175},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3581784.3607062","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3581784.3607062","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3581784.3607062","source":null,"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3581784.3607062","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3581784.3607062","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3581784.3607062","source":null,"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4388031374.pdf","grobid_xml":"https://content.openalex.org/works/W4388031374.grobid-xml"},"referenced_works_count":18,"referenced_works":["https://openalex.org/W1480958225","https://openalex.org/W1506582611","https://openalex.org/W2058244017","https://openalex.org/W2080928154","https://openalex.org/W2124509324","https://openalex.org/W2140523364","https://openalex.org/W2735481939","https://openalex.org/W2747329762","https://openalex.org/W2798445803","https://openalex.org/W2900229157","https://openalex.org/W2949985202","https://openalex.org/W2951465128","https://openalex.org/W2984796925","https://openalex.org/W2998702515","https://openalex.org/W3003611599","https://openalex.org/W3040206323","https://openalex.org/W3093933627","https://openalex.org/W4285582629"],"related_works":["https://openalex.org/W2317245370","https://openalex.org/W1980160788","https://openalex.org/W198851386","https://openalex.org/W2030310580","https://openalex.org/W947442053","https://openalex.org/W2148915962","https://openalex.org/W4313433561","https://openalex.org/W2283866686","https://openalex.org/W4287182096","https://openalex.org/W27867058"],"abstract_inverted_index":{"The":[0],"top-K":[1,24,32,130],"problem":[2,108],"is":[3],"an":[4,48],"essential":[5],"part":[6],"of":[7,90,107],"many":[8],"important":[9],"applications":[10],"in":[11],"scientific":[12],"computing,":[13],"information":[14],"retrieval,":[15],"etc.":[16],"As":[17],"data":[18,57,70,75,111],"volume":[19],"grows":[20],"rapidly,":[21],"high-performance":[22],"parallel":[23,31,83],"algorithms":[25],"become":[26],"critical.":[27],"We":[28,93],"propose":[29],"two":[30],"algorithms,":[33],"AIR":[34,45,120],"Top-K":[35,46,121],"(Adaptive":[36],"and":[37,41,55,82,100,110,117,124,132,134],"Iteration-fused":[38],"Radix":[39],"Top-K)":[40],"GridSelect,":[42],"for":[43,103],"GPU.":[44],"employs":[47],"iteration-fused":[49],"design":[50],"to":[51,86,143],"minimize":[52],"CPU-GPU":[53],"communication":[54],"device":[56,64],"access.":[58],"Its":[59],"adaptive":[60],"strategy":[61],"eliminates":[62],"unnecessary":[63],"memory":[65],"traffic":[66],"automatically":[67],"under":[68],"various":[69],"distributions.":[71,112],"GridSelect":[72,140],"can":[73],"process":[74],"on-the-fly.":[76],"It":[77],"adopts":[78],"a":[79,104],"shared":[80],"queue":[81],"two-step":[84],"insertion":[85],"decrease":[87],"the":[88],"frequency":[89],"costly":[91],"operations.":[92],"comprehensively":[94],"compare":[95],"8":[96],"open-source":[97],"GPU":[98],"implementations":[99],"our":[101],"methods":[102],"wide":[105],"range":[106],"sizes":[109,115],"For":[113],"batch":[114],"1":[116],"100,":[118],"respectively,":[119],"shows":[122,141],"1.98--21.48\u00d7":[123],"8.01--574.78\u00d7":[125],"speedup":[126,136,145],"over":[127,137,146],"previous":[128],"radix":[129],"algorithm,":[131],"1.44--7.34\u00d7":[133],"1.38--31.91\u00d7":[135],"state-of-the-art":[138],"methods.":[139],"up":[142],"882.29\u00d7":[144],"its":[147],"baseline.":[148]},"counts_by_year":[{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":3}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
