{"id":"https://openalex.org/W3207980721","doi":"https://doi.org/10.1145/3482880","title":"High-Performance and Energy-Efficient 3D Manycore GPU Architecture for Accelerating Graph Analytics","display_name":"High-Performance and Energy-Efficient 3D Manycore GPU Architecture for Accelerating Graph Analytics","publication_year":2021,"publication_date":"2021-10-22","ids":{"openalex":"https://openalex.org/W3207980721","doi":"https://doi.org/10.1145/3482880","mag":"3207980721"},"language":"en","primary_location":{"id":"doi:10.1145/3482880","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3482880","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3482880","source":{"id":"https://openalex.org/S96198239","display_name":"ACM Journal on Emerging Technologies in Computing Systems","issn_l":"1550-4832","issn":["1550-4832","1550-4840"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Journal on Emerging Technologies in Computing Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3482880","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060069938","display_name":"Dwaipayan Choudhury","orcid":"https://orcid.org/0000-0001-6852-6074"},"institutions":[{"id":"https://openalex.org/I72951846","display_name":"Washington State University","ror":"https://ror.org/05dk0ce17","country_code":"US","type":"education","lineage":["https://openalex.org/I72951846"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dwaipayan Choudhury","raw_affiliation_strings":["Washington State University, Pullman, WA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Washington State University, Pullman, WA","institution_ids":["https://openalex.org/I72951846"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015460842","display_name":"Aravind Sukumaran-Rajam","orcid":"https://orcid.org/0000-0002-4062-0293"},"institutions":[{"id":"https://openalex.org/I72951846","display_name":"Washington State University","ror":"https://ror.org/05dk0ce17","country_code":"US","type":"education","lineage":["https://openalex.org/I72951846"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aravind Sukumaran Rajam","raw_affiliation_strings":["Washington State University, Pullman, WA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Washington State University, Pullman, WA","institution_ids":["https://openalex.org/I72951846"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048410309","display_name":"Ananth Kalyanaraman","orcid":"https://orcid.org/0000-0001-6721-233X"},"institutions":[{"id":"https://openalex.org/I72951846","display_name":"Washington State University","ror":"https://ror.org/05dk0ce17","country_code":"US","type":"education","lineage":["https://openalex.org/I72951846"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ananth Kalyanaraman","raw_affiliation_strings":["Washington State University, Pullman, WA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Washington State University, Pullman, WA","institution_ids":["https://openalex.org/I72951846"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078441163","display_name":"Partha Pratim Pande","orcid":"https://orcid.org/0000-0002-5930-8531"},"institutions":[{"id":"https://openalex.org/I72951846","display_name":"Washington State University","ror":"https://ror.org/05dk0ce17","country_code":"US","type":"education","lineage":["https://openalex.org/I72951846"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Partha Pratim Pande","raw_affiliation_strings":["Washington State University, Pullman, WA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Washington State University, Pullman, WA","institution_ids":["https://openalex.org/I72951846"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3207,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.62386907,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"18","issue":"1","first_page":"1","last_page":"19"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8141645193099976},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4851022958755493},{"id":"https://openalex.org/keywords/dram","display_name":"Dram","score":0.4789629280567169},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.47093597054481506},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.46955329179763794},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.46667802333831787},{"id":"https://openalex.org/keywords/memory-architecture","display_name":"Memory architecture","score":0.4625828266143799},{"id":"https://openalex.org/keywords/energy-consumption","display_name":"Energy consumption","score":0.43478071689605713},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.43112289905548096},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.4251984655857086},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.34732696413993835},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.33770206570625305},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.17605000734329224},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.12728086113929749}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8141645193099976},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4851022958755493},{"id":"https://openalex.org/C7366592","wikidata":"https://www.wikidata.org/wiki/Q1255620","display_name":"Dram","level":2,"score":0.4789629280567169},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.47093597054481506},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.46955329179763794},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.46667802333831787},{"id":"https://openalex.org/C2779602883","wikidata":"https://www.wikidata.org/wiki/Q15544750","display_name":"Memory architecture","level":2,"score":0.4625828266143799},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.43478071689605713},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.43112289905548096},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.4251984655857086},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.34732696413993835},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.33770206570625305},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.17605000734329224},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.12728086113929749},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3482880","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3482880","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3482880","source":{"id":"https://openalex.org/S96198239","display_name":"ACM Journal on Emerging Technologies in Computing Systems","issn_l":"1550-4832","issn":["1550-4832","1550-4840"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Journal on Emerging Technologies in Computing Systems","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3482880","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3482880","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3482880","source":{"id":"https://openalex.org/S96198239","display_name":"ACM Journal on Emerging Technologies in Computing Systems","issn_l":"1550-4832","issn":["1550-4832","1550-4840"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Journal on Emerging Technologies in Computing Systems","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.8999999761581421,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G2466898058","display_name":null,"funder_award_id":"1815467","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4788921062","display_name":null,"funder_award_id":"CCF-1815467","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3207980721.pdf","grobid_xml":"https://content.openalex.org/works/W3207980721.grobid-xml"},"referenced_works_count":38,"referenced_works":["https://openalex.org/W1979527452","https://openalex.org/W2008620264","https://openalex.org/W2018934112","https://openalex.org/W2021211271","https://openalex.org/W2048466306","https://openalex.org/W2104492856","https://openalex.org/W2112090702","https://openalex.org/W2118961575","https://openalex.org/W2123538390","https://openalex.org/W2325912822","https://openalex.org/W2512825584","https://openalex.org/W2729477418","https://openalex.org/W2738366405","https://openalex.org/W2756437767","https://openalex.org/W2784007581","https://openalex.org/W2792497586","https://openalex.org/W2795118915","https://openalex.org/W2888269620","https://openalex.org/W2897519942","https://openalex.org/W2919497868","https://openalex.org/W2951135776","https://openalex.org/W2963565222","https://openalex.org/W3004383742","https://openalex.org/W3043715351","https://openalex.org/W3099610822","https://openalex.org/W3103967557","https://openalex.org/W3105208701","https://openalex.org/W3109214369","https://openalex.org/W3161671595","https://openalex.org/W4230315356","https://openalex.org/W4232902075","https://openalex.org/W4234833047","https://openalex.org/W4238970236","https://openalex.org/W4239474411","https://openalex.org/W4240607294","https://openalex.org/W4240862739","https://openalex.org/W4249375951","https://openalex.org/W4292907573"],"related_works":["https://openalex.org/W2800626838","https://openalex.org/W4205208341","https://openalex.org/W1540125283","https://openalex.org/W2001585562","https://openalex.org/W2077105843","https://openalex.org/W4205541923","https://openalex.org/W2154560316","https://openalex.org/W2112804590","https://openalex.org/W2140286994","https://openalex.org/W3175523456"],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,159,178,228,275],"GPU-based":[3],"manycore":[4,124,150],"accelerators":[5],"provide":[6,46],"the":[7,34,54,63,79,95,99,128,131,134,139,154,175,185,203,208,229,239,247,265],"opportunity":[8],"to":[9,49,60,97,195,201,223,309],"efficiently":[10],"process":[11],"large-scale":[12],"graphs":[13,19],"on":[14,306],"chip.":[15],"However,":[16],"real":[17],"world":[18],"have":[20],"a":[21,40,47,119,144,168,179,291,310],"diverse":[22],"range":[23],"of":[24,36,81,101,118,130,231,294],"topology":[25,56],"and":[26,83,92,138,162,225,256,271,301],"connectivity":[27],"patterns":[28,66,86],"(e.g.,":[29],"degree":[30],"distributions)":[31],"that":[32,67,261,283],"make":[33],"design":[35,100,117,181,314],"input-agnostic":[37],"hardware":[38],"architectures":[39,45],"challenge.":[41],"Network-on-Chip":[42],"(NoC)-":[43],"based":[44],"way":[48],"overcome":[50],"this":[51,74],"challenge":[52],"as":[53,238],"architectural":[55],"can":[57],"be":[58],"used":[59],"approximately":[61],"model":[62],"expected":[64],"traffic":[65,85],"emerge":[68],"from":[69],"graph":[70,90,212],"application":[71],"workloads.":[72],"In":[73,106,234],"paper,":[75],"we":[76,115,173],"first":[77],"study":[78],"mix":[80],"long-":[82],"short-range":[84],"generated":[87],"on-chip":[88],"using":[89],"workloads,":[91],"subsequently":[93],"use":[94],"findings":[96],"adapt":[98],"an":[102],"optimal":[103],"NoC-based":[104],"architecture.":[105,210],"particular,":[107],"by":[108,166,207],"leveraging":[109],"emerging":[110],"three-dimensional":[111],"(3D)":[112],"integration":[113,190],"technology,":[114],"propose":[116],"small-world":[120],"NoC":[121],"(SWNoC)-":[122],"enabled":[123],"GPU":[125,151,236,295],"architecture,":[126],"where":[127],"placement":[129],"links":[132],"connecting":[133],"streaming":[135],"multiprocessors":[136],"(SM)":[137],"memory":[140,216,241,273,286],"controllers":[141],"(MC)":[142],"follow":[143],"power-law":[145],"distribution.":[146],"The":[147,188,278],"proposed":[148,279],"3D":[149,180,189,285],"architecture":[152],"outperforms":[153],"traditional":[155],"planar":[156,312],"(2D)":[157],"counterparts":[158],"both":[160],"performance":[161,204,255,299],"energy":[163,226,257,304],"consumption.":[164,258],"Moreover,":[165],"adopting":[167],"joint":[169],"performance-thermal":[170],"optimization":[171],"strategy,":[172],"address":[174],"thermal":[176],"concerns":[177],"without":[182],"noticeably":[183],"compromising":[184],"achievable":[186],"performance.":[187],"technology":[191],"is":[192,243],"also":[193],"leveraged":[194],"incorporate":[196],"Near":[197],"Data":[198],"Processing":[199],"(NDP)":[200],"complement":[202],"benefits":[205],"introduced":[206],"SWNoC":[209],"As":[211],"applications":[213],"are":[214],"inherently":[215],"intensive,":[217],"off-chip":[218,249],"data":[219,250],"movement":[220,251],"gives":[221],"rise":[222],"latency":[224],"overheads":[227,266],"presence":[230],"external":[232,316],"DRAM.":[233,317],"conventional":[235,311],"architectures,":[237],"main":[240],"layer":[242],"not":[244],"integrated":[245],"with":[246,268,290,315],"logic,":[248],"negatively":[252],"impacts":[253],"overall":[254],"We":[259],"demonstrate":[260],"NDP":[262,281],"significantly":[263],"reduces":[264],"associated":[267],"such":[269],"frequent":[270],"irregular":[272],"accesses":[274],"graph-based":[276],"applications.":[277],"SWNoC-enabled":[280],"framework":[282],"integrates":[284],"(like":[287],"Micron's":[288],"HMC)":[289],"massive":[292],"number":[293],"cores":[296],"achieves":[297],"29.5%":[298],"improvement":[300],"30.03%":[302],"less":[303],"consumption":[305],"average":[307],"compared":[308],"Mesh-based":[313]},"counts_by_year":[{"year":2022,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
