{"id":"https://openalex.org/W2951091066","doi":"https://doi.org/10.1145/3307650.3322212","title":"Opportunistic computing in GPU architectures","display_name":"Opportunistic computing in GPU architectures","publication_year":2019,"publication_date":"2019-06-14","ids":{"openalex":"https://openalex.org/W2951091066","doi":"https://doi.org/10.1145/3307650.3322212","mag":"2951091066"},"language":"en","primary_location":{"id":"doi:10.1145/3307650.3322212","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3307650.3322212","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3307650.3322212","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 46th International Symposium on Computer Architecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3307650.3322212","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112474431","display_name":"Ashutosh Pattnaik","orcid":"https://orcid.org/0000-0003-0367-5989"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ashutosh Pattnaik","raw_affiliation_strings":["The Pennsylvania State University"],"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087859795","display_name":"Xulong Tang","orcid":"https://orcid.org/0000-0002-3385-2053"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xulong Tang","raw_affiliation_strings":["The Pennsylvania State University"],"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064126781","display_name":"Onur Kay\u0131ran","orcid":"https://orcid.org/0009-0006-4482-3115"},"institutions":[{"id":"https://openalex.org/I1311921367","display_name":"Advanced Micro Devices (Canada)","ror":"https://ror.org/02yh0k313","country_code":"CA","type":"company","lineage":["https://openalex.org/I1311921367","https://openalex.org/I4210137977"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Onur Kayiran","raw_affiliation_strings":["Advanced Micro Devices, Inc"],"affiliations":[{"raw_affiliation_string":"Advanced Micro Devices, Inc","institution_ids":["https://openalex.org/I1311921367"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050350124","display_name":"Adwait Jog","orcid":"https://orcid.org/0000-0002-5525-7204"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Adwait Jog","raw_affiliation_strings":["College of William &amp; Mary"],"affiliations":[{"raw_affiliation_string":"College of William &amp; Mary","institution_ids":["https://openalex.org/I16285277"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101912070","display_name":"Asit Mishra","orcid":"https://orcid.org/0000-0001-6489-6895"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Asit Mishra","raw_affiliation_strings":["NVIDIA Corp"],"affiliations":[{"raw_affiliation_string":"NVIDIA Corp","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007116603","display_name":"Mahmut Kandemir","orcid":"https://orcid.org/0000-0002-9940-9951"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mahmut T. Kandemir","raw_affiliation_strings":["The Pennsylvania State University"],"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033253809","display_name":"Anand Sivasubramaniam","orcid":"https://orcid.org/0000-0001-6173-687X"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anand Sivasubramaniam","raw_affiliation_strings":["The Pennsylvania State University"],"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054027488","display_name":"Chita R. Das","orcid":"https://orcid.org/0000-0002-4746-7578"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chita R. Das","raw_affiliation_strings":["The Pennsylvania State University"],"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University","institution_ids":["https://openalex.org/I130769515"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5112474431"],"corresponding_institution_ids":["https://openalex.org/I130769515"],"apc_list":null,"apc_paid":null,"fwci":6.1715,"has_fulltext":true,"cited_by_count":40,"citation_normalized_percentile":{"value":0.97088976,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"210","last_page":"223"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8896855115890503},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.7152373790740967},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6771678328514099},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.6204268932342529},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.5680124163627625},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.4873538911342621},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.46626028418540955},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.45591461658477783},{"id":"https://openalex.org/keywords/von-neumann-architecture","display_name":"Von Neumann architecture","score":0.44295722246170044},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4315575361251831},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3437327444553375},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.1472516655921936},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.14112433791160583},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.11801880598068237}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8896855115890503},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.7152373790740967},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6771678328514099},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.6204268932342529},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.5680124163627625},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.4873538911342621},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.46626028418540955},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.45591461658477783},{"id":"https://openalex.org/C80469333","wikidata":"https://www.wikidata.org/wiki/Q189088","display_name":"Von Neumann architecture","level":2,"score":0.44295722246170044},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4315575361251831},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3437327444553375},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.1472516655921936},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.14112433791160583},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.11801880598068237},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3307650.3322212","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3307650.3322212","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3307650.3322212","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 46th International Symposium on Computer Architecture","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3307650.3322212","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3307650.3322212","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3307650.3322212","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 46th International Symposium on Computer Architecture","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.4300000071525574,"id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G2651608833","display_name":null,"funder_award_id":"1750667","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3023408722","display_name":null,"funder_award_id":"1763681","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3727847720","display_name":"CRII: SHF: Design and Analysis of Processing-Near-Memory Enabled GPU Architecture","funder_award_id":"1657336","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4257531402","display_name":null,"funder_award_id":"1629915","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6193076742","display_name":null,"funder_award_id":"1317560","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7045959657","display_name":null,"funder_award_id":"1629129","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7489876353","display_name":null,"funder_award_id":"1763681,1629915,1629129,1439021,1317560","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320307757","display_name":"Advanced Micro Devices","ror":"https://ror.org/04kd6c783"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2951091066.pdf","grobid_xml":"https://content.openalex.org/works/W2951091066.grobid-xml"},"referenced_works_count":71,"referenced_works":["https://openalex.org/W1555673550","https://openalex.org/W1749461670","https://openalex.org/W1815597787","https://openalex.org/W1930795142","https://openalex.org/W1973573211","https://openalex.org/W1975237352","https://openalex.org/W1979527452","https://openalex.org/W1985818188","https://openalex.org/W2024122052","https://openalex.org/W2028173051","https://openalex.org/W2045261210","https://openalex.org/W2051260038","https://openalex.org/W2058490651","https://openalex.org/W2061537881","https://openalex.org/W2066985990","https://openalex.org/W2079248286","https://openalex.org/W2080592089","https://openalex.org/W2086112773","https://openalex.org/W2088253639","https://openalex.org/W2093043622","https://openalex.org/W2094332102","https://openalex.org/W2097215759","https://openalex.org/W2098505406","https://openalex.org/W2112547256","https://openalex.org/W2112980698","https://openalex.org/W2116784058","https://openalex.org/W2123184444","https://openalex.org/W2128120785","https://openalex.org/W2128329055","https://openalex.org/W2130820665","https://openalex.org/W2131413854","https://openalex.org/W2132581831","https://openalex.org/W2149234156","https://openalex.org/W2155568054","https://openalex.org/W2156831150","https://openalex.org/W2163083614","https://openalex.org/W2169880332","https://openalex.org/W2244841219","https://openalex.org/W2321888714","https://openalex.org/W2399715892","https://openalex.org/W2412101011","https://openalex.org/W2412412354","https://openalex.org/W2416518802","https://openalex.org/W2464177207","https://openalex.org/W2509861650","https://openalex.org/W2513721464","https://openalex.org/W2513900365","https://openalex.org/W2517869808","https://openalex.org/W2529404329","https://openalex.org/W2529649238","https://openalex.org/W2563305490","https://openalex.org/W2593150153","https://openalex.org/W2611998574","https://openalex.org/W2612654866","https://openalex.org/W2619002702","https://openalex.org/W2735065300","https://openalex.org/W2735130281","https://openalex.org/W2751459347","https://openalex.org/W2766369723","https://openalex.org/W2766789999","https://openalex.org/W2775752784","https://openalex.org/W2776052384","https://openalex.org/W2789554134","https://openalex.org/W2905560038","https://openalex.org/W2905658624","https://openalex.org/W2970239788","https://openalex.org/W2982559305","https://openalex.org/W3139689176","https://openalex.org/W4232022457","https://openalex.org/W4233147525","https://openalex.org/W4234107648"],"related_works":["https://openalex.org/W1963859303","https://openalex.org/W2364044215","https://openalex.org/W2389600408","https://openalex.org/W240129890","https://openalex.org/W3048701459","https://openalex.org/W2149078538","https://openalex.org/W2370314112","https://openalex.org/W1912958759","https://openalex.org/W2792081825","https://openalex.org/W2893308117"],"abstract_inverted_index":{"Data":[0,51],"transfer":[1,92],"overhead":[2,39],"between":[3,93],"computing":[4,95],"cores":[5,96,171],"and":[6,18,97,141,203,225],"memory":[7],"hierarchy":[8],"has":[9,21,56,69],"been":[10,57],"a":[11,78,120],"persistent":[12],"issue":[13],"for":[14,62,82,153],"von":[15],"Neumann":[16],"architectures":[17,84],"the":[19,27,43,66,86,94,126,148,155,189,212],"problem":[20],"only":[22],"become":[23],"more":[24],"challenging":[25],"with":[26,85,150],"emergence":[28],"of":[29,88],"manycore":[30],"systems.":[31],"A":[32],"conceptually":[33],"powerful":[34],"approach":[35],"to":[36,41,46,119,124,158,168,172,211,222],"mitigate":[37],"this":[38,74],"is":[40],"bring":[42],"computation":[44,156],"closer":[45,123],"data,":[47],"known":[48],"as":[49],"Near":[50],"Computing":[52],"(NDC).":[53],"Recently,":[54],"NDC":[55,80],"investigated":[58],"in":[59,112,206],"different":[60],"flavors":[61],"CPU-based":[63],"multicores,":[64],"while":[65],"GPU":[67,83,113,170,178,214],"domain":[68],"received":[70],"little":[71],"attention.":[72],"In":[73],"paper,":[75],"we":[76,104],"present":[77],"novel":[79],"solution":[81],"objective":[87],"minimizing":[89],"on-chip":[90,207],"data":[91,127,132,208],"Last-Level":[98],"Cache":[99],"(LLC).":[100],"To":[101],"achieve":[102],"this,":[103],"first":[105,144],"identify":[106],"frequently":[107],"occurring":[108],"Load-Compute-Store":[109],"instruction":[110],"chains":[111],"applications.":[114],"These":[115],"chains,":[116],"when":[117],"offloaded":[118,157,175],"compute":[121,173],"unit":[122],"where":[125],"resides,":[128],"can":[129],"significantly":[130],"reduce":[131],"movement.":[133],"We":[134],"develop":[135],"two":[136],"offloading":[137],"techniques,":[138],"called":[139],"LLC-Compute":[140,190],"Omni-Compute.":[142],"The":[143,160,216],"technique,":[145],"LLC-Compute,":[146],"augments":[147],"LLCs":[149],"computational":[151],"hardware":[152,167],"handling":[154],"them.":[159],"second":[161],"technique":[162,191],"(Omni-Compute)":[163],"employs":[164],"simple":[165],"bookkeeping":[166],"enable":[169],"instructions":[174],"by":[176],"other":[177],"cores.":[179],"Our":[180],"experimental":[181],"evaluations":[182],"on":[183,193],"nine":[184],"GPGPU":[185],"workloads":[186],"indicate":[187],"that":[188],"provides,":[192],"an":[194],"average,":[195],"19%":[196],"performance":[197],"improvement":[198],"(IPC),":[199],"11%":[200],"performance/watt":[201],"improvement,":[202],"29%":[204],"reduction":[205],"movement":[209],"compared":[210],"baseline":[213],"design.":[215],"Omni-Compute":[217],"design":[218],"boosts":[219],"these":[220],"benefits":[221],"31%,":[223],"16%":[224],"44%,":[226],"respectively.":[227]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":7}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
