{"id":"https://openalex.org/W4311546748","doi":"https://doi.org/10.1145/3547276.3548627","title":"A Study on Atomics-based Integer Sum Reduction in HIP on AMD GPU","display_name":"A Study on Atomics-based Integer Sum Reduction in HIP on AMD GPU","publication_year":2022,"publication_date":"2022-08-29","ids":{"openalex":"https://openalex.org/W4311546748","doi":"https://doi.org/10.1145/3547276.3548627"},"language":"en","primary_location":{"id":"doi:10.1145/3547276.3548627","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3547276.3548627","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Workshop Proceedings of the 51st International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.osti.gov/biblio/1902809","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101790023","display_name":"Zheming Jin","orcid":"https://orcid.org/0000-0002-7197-780X"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zheming Jin","raw_affiliation_strings":["Oak Ridge National Laboratory, United States of America"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, United States of America","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061838490","display_name":"Jeffrey S. Vetter","orcid":"https://orcid.org/0000-0002-2449-6720"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jeffrey Vetter","raw_affiliation_strings":["Oak Ridge National Laboratory, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061838490","display_name":"Jeffrey S. Vetter","orcid":"https://orcid.org/0000-0002-2449-6720"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jeffrey Vetter","raw_affiliation_strings":["Oak Ridge National Laboratory, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, USA","institution_ids":["https://openalex.org/I1289243028"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6998,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.68835125,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10682","display_name":"Quantum Computing Algorithms and Architecture","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8550302982330322},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6926029920578003},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.6002357006072998},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5686050653457642},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.5557634830474854},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.49214428663253784},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2229795753955841}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8550302982330322},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6926029920578003},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.6002357006072998},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5686050653457642},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.5557634830474854},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.49214428663253784},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2229795753955841},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3547276.3548627","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3547276.3548627","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Workshop Proceedings of the 51st International Conference on Parallel Processing","raw_type":"proceedings-article"},{"id":"pmh:oai:osti.gov:1902809","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/1902809","pdf_url":null,"source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"pmh:oai:osti.gov:1902809","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/1902809","pdf_url":null,"source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},"sustainable_development_goals":[{"score":0.6899999976158142,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1480958225","https://openalex.org/W2016357834","https://openalex.org/W2086105136","https://openalex.org/W2088134616","https://openalex.org/W2399715892","https://openalex.org/W2748044793","https://openalex.org/W2776545425","https://openalex.org/W2791531244","https://openalex.org/W2954922220","https://openalex.org/W2962721408","https://openalex.org/W3022422003","https://openalex.org/W3137083963","https://openalex.org/W3163802921","https://openalex.org/W3203828302","https://openalex.org/W3209449653","https://openalex.org/W4235158490","https://openalex.org/W4250027548","https://openalex.org/W6748640332"],"related_works":["https://openalex.org/W3096456556","https://openalex.org/W4240253816","https://openalex.org/W2169584677","https://openalex.org/W2979513934","https://openalex.org/W4232954277","https://openalex.org/W2020341030","https://openalex.org/W2749133591","https://openalex.org/W2367473450","https://openalex.org/W23346600","https://openalex.org/W2460280200"],"abstract_inverted_index":{"Integer":[0],"sum":[1],"reduction":[2,15,65,95,147],"is":[3,115,157,221],"a":[4,13,17,32,35,67,80,116,143,146],"primitive":[5],"operation":[6],"commonly":[7],"used":[8],"in":[9,31,44,83],"scientific":[10],"computing.":[11],"Implementing":[12],"parallel":[14],"on":[16,79,203],"GPU":[18,205,219],"often":[19],"involves":[20],"concurrent":[21],"memory":[22,72,99,136,151],"accesses":[23,152],"using":[24,73],"atomic":[25,54,75,131],"operations":[26,55,132],"and":[27,64,104,125,153,210,213],"synchronization":[28,124],"of":[29,38,53,61,93,122,128,142,191,199,217],"work-items":[30],"work-group.":[33,144],"For":[34],"better":[36],"understanding":[37],"these":[39],"operations,":[40],"we":[41,89,138],"redesigned":[42],"micro-kernels":[43],"the":[45,51,59,91,94,120,126,140,162,167,171,176,181,189,196,200,204,214],"HIP":[46],"programming":[47],"language":[48],"to":[49,69,179],"measure":[50],"time":[52],"over":[56,133],"global":[57],"memory,":[58],"cost":[60,121],"barrier":[62,123],"synchronization,":[63],"within":[66],"work-group":[68],"shared":[70,134],"local":[71,135],"one":[74],"addition":[76],"per":[77],"work-item":[78],"compute":[81],"unit":[82],"an":[84],"AMD":[85],"MI100":[86],"GPU.":[87],"Then,":[88],"describe":[90],"implementations":[92],"kernels":[96,168],"with":[97,149,170,184],"vectorized":[98,150],"accesses,":[100],"parameterized":[101],"workload":[102],"sizes,":[103],"vendor's":[105,172],"library":[106,173],"APIs.":[107,174],"Our":[108],"experimental":[109],"results":[110],"show":[111],"that":[112],"1)":[113],"there":[114],"performance":[117],"tradeoff":[118],"between":[119,207],"amount":[127],"parallelism":[129],"from":[130],"when":[137],"increase":[139],"size":[141,165],"2)":[145],"kernel":[148,201],"vector":[154],"data":[155,185],"types":[156],"approximately":[158],"3%":[159],"faster":[160],"for":[161],"large":[163],"problem":[164],"than":[166],"written":[169],"3)":[175],"compiler":[177],"needs":[178],"assist":[180],"hardware":[182],"processor":[183],"dependency":[186],"resolution":[187],"at":[188,222],"level":[190],"instruction":[192],"set":[193],"architecture.":[194],"4)":[195],"power":[197,216],"consumption":[198],"execution":[202],"fluctuates":[206],"277":[208],"Watts":[209,212],"301":[211],"dynamic":[215],"other":[218],"activities":[220],"most":[223],"31":[224],"Watts.":[225]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2}],"updated_date":"2026-06-17T08:01:34.144755","created_date":"2025-10-10T00:00:00"}
