{"id":"https://openalex.org/W3117273852","doi":"https://doi.org/10.1145/3429981","title":"Efficient Nearest-Neighbor Data Sharing in GPUs","display_name":"Efficient Nearest-Neighbor Data Sharing in GPUs","publication_year":2020,"publication_date":"2020-12-30","ids":{"openalex":"https://openalex.org/W3117273852","doi":"https://doi.org/10.1145/3429981","mag":"3117273852"},"language":"en","primary_location":{"id":"doi:10.1145/3429981","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3429981","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3429981","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3429981","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053374807","display_name":"Negin Mahani","orcid":"https://orcid.org/0000-0001-5232-3539"},"institutions":[{"id":"https://openalex.org/I133529467","display_name":"Sharif University of Technology","ror":"https://ror.org/024c2fq17","country_code":"IR","type":"education","lineage":["https://openalex.org/I133529467"]}],"countries":["IR"],"is_corresponding":true,"raw_author_name":"Negin Nematollahi","raw_affiliation_strings":["Department of Computer Engineering, Sharif University of Technology, Iran"],"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, Sharif University of Technology, Iran","institution_ids":["https://openalex.org/I133529467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008240365","display_name":"Mohammad Sadrosadati","orcid":"https://orcid.org/0000-0002-4029-0175"},"institutions":[{"id":"https://openalex.org/I4210146419","display_name":"Institute for Research in Fundamental Sciences","ror":"https://ror.org/04xreqs31","country_code":"IR","type":"facility","lineage":["https://openalex.org/I4210146419"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Mohammad Sadrosadati","raw_affiliation_strings":["School of Computer Science, Institute for Research in Fundamental Sciences (IPM), Iran"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Institute for Research in Fundamental Sciences (IPM), Iran","institution_ids":["https://openalex.org/I4210146419"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019048190","display_name":"Hajar Falahati","orcid":"https://orcid.org/0000-0001-8375-3339"},"institutions":[{"id":"https://openalex.org/I4210146419","display_name":"Institute for Research in Fundamental Sciences","ror":"https://ror.org/04xreqs31","country_code":"IR","type":"facility","lineage":["https://openalex.org/I4210146419"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Hajar Falahati","raw_affiliation_strings":["School of Computer Science, Institute for Research in Fundamental Sciences (IPM), Iran"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Institute for Research in Fundamental Sciences (IPM), Iran","institution_ids":["https://openalex.org/I4210146419"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089451509","display_name":"Marzieh Barkhordar","orcid":null},"institutions":[{"id":"https://openalex.org/I133529467","display_name":"Sharif University of Technology","ror":"https://ror.org/024c2fq17","country_code":"IR","type":"education","lineage":["https://openalex.org/I133529467"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Marzieh Barkhordar","raw_affiliation_strings":["Department of Computer Engineering, Sharif University of Technology, Iran"],"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, Sharif University of Technology, Iran","institution_ids":["https://openalex.org/I133529467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089926912","display_name":"Mario Drumond","orcid":"https://orcid.org/0000-0002-1981-3525"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Mario Paulo Drumond","raw_affiliation_strings":["EPFL University, Switzerland"],"affiliations":[{"raw_affiliation_string":"EPFL University, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040238844","display_name":"Hamid Sarbazi\u2010Azad","orcid":"https://orcid.org/0000-0003-4079-8603"},"institutions":[{"id":"https://openalex.org/I133529467","display_name":"Sharif University of Technology","ror":"https://ror.org/024c2fq17","country_code":"IR","type":"education","lineage":["https://openalex.org/I133529467"]},{"id":"https://openalex.org/I4210146419","display_name":"Institute for Research in Fundamental Sciences","ror":"https://ror.org/04xreqs31","country_code":"IR","type":"facility","lineage":["https://openalex.org/I4210146419"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Hamid Sarbazi-Azad","raw_affiliation_strings":["Department of Computer Engineering, Sharif University of Technology, Iran and School of Computer Science, Institute for Research in Fundamental Sciences (IPM), Iran"],"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, Sharif University of Technology, Iran and School of Computer Science, Institute for Research in Fundamental Sciences (IPM), Iran","institution_ids":["https://openalex.org/I4210146419","https://openalex.org/I133529467"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057697787","display_name":"Babak Falsafi","orcid":"https://orcid.org/0000-0001-5916-8068"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Babak Falsafi","raw_affiliation_strings":["EPFL University, Switzerland"],"affiliations":[{"raw_affiliation_string":"EPFL University, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5053374807"],"corresponding_institution_ids":["https://openalex.org/I133529467"],"apc_list":null,"apc_paid":null,"fwci":0.6473,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.73537257,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"18","issue":"1","first_page":"1","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8597202897071838},{"id":"https://openalex.org/keywords/stencil","display_name":"Stencil","score":0.7394242286682129},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.694831907749176},{"id":"https://openalex.org/keywords/k-nearest-neighbors-algorithm","display_name":"k-nearest neighbors algorithm","score":0.6441197395324707},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.5292234420776367},{"id":"https://openalex.org/keywords/graphics-processing-unit","display_name":"Graphics processing unit","score":0.4800209105014801},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.472013920545578},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.4647703766822815},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.455564022064209},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.4338683485984802},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4104781746864319},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.2999061346054077},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.10455399751663208},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.09764683246612549},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.09535011649131775}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8597202897071838},{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.7394242286682129},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.694831907749176},{"id":"https://openalex.org/C113238511","wikidata":"https://www.wikidata.org/wiki/Q1071612","display_name":"k-nearest neighbors algorithm","level":2,"score":0.6441197395324707},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.5292234420776367},{"id":"https://openalex.org/C2779851693","wikidata":"https://www.wikidata.org/wiki/Q183484","display_name":"Graphics processing unit","level":2,"score":0.4800209105014801},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.472013920545578},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.4647703766822815},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.455564022064209},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.4338683485984802},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4104781746864319},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.2999061346054077},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.10455399751663208},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.09764683246612549},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.09535011649131775},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3429981","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3429981","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3429981","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3429981","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3429981","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3429981","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.4099999964237213,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3117273852.pdf","grobid_xml":"https://content.openalex.org/works/W3117273852.grobid-xml"},"referenced_works_count":69,"referenced_works":["https://openalex.org/W799594534","https://openalex.org/W1974967412","https://openalex.org/W1979660638","https://openalex.org/W1980872666","https://openalex.org/W1982996921","https://openalex.org/W2017914730","https://openalex.org/W2028798345","https://openalex.org/W2074326248","https://openalex.org/W2077319512","https://openalex.org/W2079248286","https://openalex.org/W2080592089","https://openalex.org/W2082229508","https://openalex.org/W2090584832","https://openalex.org/W2091518118","https://openalex.org/W2091670975","https://openalex.org/W2096661534","https://openalex.org/W2124542970","https://openalex.org/W2135682468","https://openalex.org/W2149234156","https://openalex.org/W2163687928","https://openalex.org/W2166338303","https://openalex.org/W2232645663","https://openalex.org/W2418290227","https://openalex.org/W2492295191","https://openalex.org/W2516627871","https://openalex.org/W2554192763","https://openalex.org/W2562227272","https://openalex.org/W2562896936","https://openalex.org/W2566040696","https://openalex.org/W2567071743","https://openalex.org/W2567655207","https://openalex.org/W2605751925","https://openalex.org/W2613509467","https://openalex.org/W2730499906","https://openalex.org/W2735974062","https://openalex.org/W2763538132","https://openalex.org/W2763549657","https://openalex.org/W2769311538","https://openalex.org/W2785874054","https://openalex.org/W2789977517","https://openalex.org/W2793895903","https://openalex.org/W2883882491","https://openalex.org/W2884590322","https://openalex.org/W2886858179","https://openalex.org/W2889543163","https://openalex.org/W2895553128","https://openalex.org/W2903775786","https://openalex.org/W2909218611","https://openalex.org/W2910737925","https://openalex.org/W2917720315","https://openalex.org/W2929410821","https://openalex.org/W2943018941","https://openalex.org/W2964177126","https://openalex.org/W2984139344","https://openalex.org/W3100822741","https://openalex.org/W3125028070","https://openalex.org/W3128982134","https://openalex.org/W3144430200","https://openalex.org/W4214549590","https://openalex.org/W4229654654","https://openalex.org/W4229779967","https://openalex.org/W4231970275","https://openalex.org/W4234833047","https://openalex.org/W4242576920","https://openalex.org/W4244089596","https://openalex.org/W4245236049","https://openalex.org/W4255138103","https://openalex.org/W4255681033","https://openalex.org/W4300129074"],"related_works":["https://openalex.org/W2794923745","https://openalex.org/W2075046026","https://openalex.org/W2320652536","https://openalex.org/W2340937903","https://openalex.org/W2119534391","https://openalex.org/W2128766769","https://openalex.org/W2146871484","https://openalex.org/W1594311701","https://openalex.org/W4301952189","https://openalex.org/W3117273852"],"abstract_inverted_index":{"Stencil":[0,16],"codes":[1,17,58,195],"(a.k.a.":[2],"nearest-neighbor":[3,19,48,67,103,122,138,221],"computations)":[4],"are":[5,127],"widely":[6],"used":[7],"in":[8,28,90,112,164,196],"image":[9],"processing,":[10],"machine":[11],"learning,":[12],"and":[13,40,78,81,129,161,168,186],"scientific":[14],"applications.":[15],"incur":[18],"data":[20,64,104,133,222],"exchange":[21,134],"because":[22],"the":[23,29,41,88,155],"value":[24,39],"of":[25,37,43,46,63,174,184,189,213],"each":[26,113],"point":[27],"structured":[30],"grid":[31],"is":[32,70,142,210],"calculated":[33],"as":[34,131,152],"a":[35,44,60,101,132,170],"function":[36],"its":[38,47],"values":[42],"subset":[45],"points.":[49],"When":[50],"running":[51],"on":[52,157],"Graphics":[53,198],"Processing":[54,199],"Unit":[55,200],"(GPUs),":[56],"stencil":[57,194],"exhibit":[59],"high":[61],"degree":[62],"sharing":[65,105],"between":[66,148],"threads.":[68],"Sharing":[69],"typically":[71],"implemented":[72],"through":[73],"shared":[74,139],"memories,":[75],"shuffle":[76],"instructions,":[77],"on-chip":[79],"caches":[80],"often":[82],"incurs":[83],"performance":[84,182,209],"overheads":[85],"due":[86],"to":[87,136,153,191],"redundancy":[89],"memory":[91],"accesses.":[92,140],"In":[93],"this":[94],"article,":[95],"we":[96],"propose":[97],"Neighbor":[98],"Data":[99],"(NeDa),":[100],"direct":[102],"mechanism":[106,135],"that":[107,117,207],"uses":[108],"two":[109],"registers":[110,126],"embedded":[111,143],"streaming":[114],"processor":[115],"(SP)":[116],"can":[118],"be":[119],"accessed":[120],"by":[121],"SP":[123,149],"cores.":[124],"The":[125,176],"compiler-allocated":[128],"serve":[130],"eliminate":[137],"NeDa":[141,163],"carefully":[144],"with":[145,217],"local":[146],"wires":[147],"cores":[150],"so":[151],"minimize":[154],"impact":[156],"density.":[158],"We":[159,205],"place":[160],"route":[162],"an":[165,180,214],"open-source":[166],"GPU":[167,216],"show":[169,206],"small":[171],"area":[172],"overhead":[173,219],"1.3%.":[175],"cycle-accurate":[177],"simulation":[178],"indicates":[179],"average":[181],"improvement":[183],"21.8%":[185],"power":[187],"reduction":[188],"up":[190],"18.3%":[192],"for":[193,220],"General-Purpose":[197],"(GPGPU)":[201],"standard":[202],"benchmark":[203],"suites.":[204],"NeDa\u2019s":[208],"within":[211],"13.2%":[212],"ideal":[215],"no":[218],"exchange.":[223]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3}],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-10-10T00:00:00"}
