{"id":"https://openalex.org/W7152081560","doi":"https://doi.org/10.48550/arxiv.2604.05885","title":"JZ-Tree: GPU friendly neighbour search and friends-of-friends with dual tree walks in JAX plus CUDA","display_name":"JZ-Tree: GPU friendly neighbour search and friends-of-friends with dual tree walks in JAX plus CUDA","publication_year":2026,"publication_date":"2026-04-07","ids":{"openalex":"https://openalex.org/W7152081560","doi":"https://doi.org/10.48550/arxiv.2604.05885"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.05885","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05885","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.05885","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015024405","display_name":"Jens St\u00fccker","orcid":"https://orcid.org/0000-0003-1258-1466"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"St\u00fccker, Jens","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057016631","display_name":"Oliver Hahn","orcid":"https://orcid.org/0000-0003-1677-8696"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hahn, Oliver","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133162476","display_name":"Lukas Winkler","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Winkler, Lukas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029500296","display_name":"A. G. Adame","orcid":"https://orcid.org/0009-0005-0594-9391"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Adame, Adrian Gutierrez","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5068207428","display_name":"Thomas Fl\u00f6ss","orcid":"https://orcid.org/0000-0002-8245-780X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fl\u00f6ss, Thomas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.37279999256134033,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.37279999256134033,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.1453000009059906,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.05739999935030937,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/tree-traversal","display_name":"Tree traversal","score":0.8819000124931335},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.7502999901771545},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.6449999809265137},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.6004999876022339},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.5151000022888184},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.4772000014781952},{"id":"https://openalex.org/keywords/multithreading","display_name":"Multithreading","score":0.44620001316070557},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.39649999141693115}],"concepts":[{"id":"https://openalex.org/C140745168","wikidata":"https://www.wikidata.org/wiki/Q1210082","display_name":"Tree traversal","level":2,"score":0.8819000124931335},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8608999848365784},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.7502999901771545},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7408000230789185},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.6449999809265137},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.6004999876022339},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.5151000022888184},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.4772000014781952},{"id":"https://openalex.org/C201410400","wikidata":"https://www.wikidata.org/wiki/Q1064412","display_name":"Multithreading","level":3,"score":0.44620001316070557},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4147999882698059},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.39649999141693115},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.358599990606308},{"id":"https://openalex.org/C96333769","wikidata":"https://www.wikidata.org/wiki/Q907955","display_name":"Graph traversal","level":3,"score":0.35359999537467957},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.3353999853134155},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.32409998774528503},{"id":"https://openalex.org/C106278948","wikidata":"https://www.wikidata.org/wiki/Q1198051","display_name":"R-tree","level":4,"score":0.3188999891281128},{"id":"https://openalex.org/C207024777","wikidata":"https://www.wikidata.org/wiki/Q621673","display_name":"Search tree","level":3,"score":0.29409998655319214},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.2937999963760376},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.29089999198913574},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.2825999855995178},{"id":"https://openalex.org/C120373497","wikidata":"https://www.wikidata.org/wiki/Q1087987","display_name":"Parallel algorithm","level":2,"score":0.28029999136924744},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2712000012397766},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.27059999108314514},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.2678999900817871},{"id":"https://openalex.org/C206175624","wikidata":"https://www.wikidata.org/wiki/Q595731","display_name":"Branching (polymer chemistry)","level":2,"score":0.267300009727478},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.26089999079704285},{"id":"https://openalex.org/C2780870223","wikidata":"https://www.wikidata.org/wiki/Q1004415","display_name":"Runtime system","level":2,"score":0.25600001215934753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.05885","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05885","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.05885","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05885","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Algorithms":[0],"based":[1],"on":[2,117],"spatial":[3,126],"tree":[4,55,85],"traversal":[5,102],"are":[6],"widely":[7],"regarded":[8],"as":[9,180],"among":[10],"the":[11,44,149],"most":[12],"efficient":[13,100,184],"and":[14,61,133],"flexible":[15],"approaches":[16],"for":[17,91,154,183],"many":[18],"problems":[19],"in":[20,41],"CPU-based":[21],"high-performance":[22],"computing":[23],"(HPC).":[24],"However,":[25],"directly":[26],"transferring":[27],"these":[28,77],"algorithms":[29,56,127],"to":[30,58,110,165],"GPU":[31,73,92,152,185],"architectures":[32],"often":[33],"yields":[34],"substantially":[35],"smaller":[36],"performance":[37,146],"gains":[38],"than":[39,143],"expected":[40],"light":[42],"of":[43,48,54,68,123,187,191],"high":[45],"computational":[46],"throughput":[47],"modern":[49],"GPUs.":[50],"The":[51,94],"branching":[52],"nature":[53],"leads":[57],"thread":[59,107],"divergence":[60],"irregular":[62],"memory":[63,113],"access":[64,114],"patterns":[65],"--":[66,128],"both":[67,138],"which":[69,178],"may":[70],"severely":[71],"limit":[72],"performance.":[74],"To":[75],"address":[76],"challenges,":[78],"we":[79,120,140],"propose":[80],"a":[81,181,188],"Morton":[82],"(z-order)":[83],"'plane-based":[84],"hierarchy'":[86],"that":[87],"is":[88],"specifically":[89],"designed":[90],"architectures.":[93],"resulting":[95],"flattened":[96],"data":[97],"layout":[98],"enables":[99],"dual-tree":[101],"with":[103,162],"collaborative":[104],"execution":[105],"across":[106],"groups,":[108],"leading":[109],"highly":[111],"coalesced":[112],"patterns.":[115],"Based":[116],"this":[118],"framework":[119],"present":[121],"implementations":[122,186],"two":[124],"important":[125],"exact":[129],"$k$-nearest":[130],"neighbour":[131],"search":[132],"friends-of-friends":[134],"(FoF)":[135],"clustering.":[136],"For":[137],"cases,":[139],"observe":[141],"more":[142],"an":[144,171],"order-of-magnitude":[145],"improvement":[147],"over":[148],"closest":[150],"competing":[151],"libraries":[153],"large":[155],"problem":[156],"sizes":[157],"($N":[158],"\\gtrsim":[159],"10^7$),":[160],"together":[161],"strong":[163],"scaling":[164],"distributed":[166],"multi-GPU":[167],"systems.":[168],"We":[169],"provide":[170],"open-source":[172],"implementation,":[173],"'JZ-Tree'":[174],"(JAX":[175],"z-order":[176],"tree),":[177],"serves":[179],"foundation":[182],"broad":[189],"class":[190],"tree-based":[192],"algorithms.":[193]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-09T00:00:00"}
