{"id":"https://openalex.org/W1992851788","doi":"https://doi.org/10.1145/1345206.1345220","title":"Optimization principles and application performance evaluation of a multithreaded GPU using CUDA","display_name":"Optimization principles and application performance evaluation of a multithreaded GPU using CUDA","publication_year":2008,"publication_date":"2008-02-20","ids":{"openalex":"https://openalex.org/W1992851788","doi":"https://doi.org/10.1145/1345206.1345220","mag":"1992851788"},"language":"en","primary_location":{"id":"doi:10.1145/1345206.1345220","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1345206.1345220","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 13th ACM SIGPLAN Symposium on Principles and practice of parallel programming","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014753963","display_name":"Shane Ryoo","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shane Ryoo","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Urbana, IL, USA","University of Illinois at Urbana/Champaign, Urbana, IL, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"University of Illinois at Urbana/Champaign, Urbana, IL, USA#TAB#","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087702701","display_name":"Christopher Rodrigues","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher I. Rodrigues","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Urbana, IL, USA","University of Illinois at Urbana/Champaign, Urbana, IL, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"University of Illinois at Urbana/Champaign, Urbana, IL, USA#TAB#","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016397835","display_name":"Sara S. Baghsorkhi","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sara S. Baghsorkhi","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Urbana, IL, USA","University of Illinois at Urbana/Champaign, Urbana, IL, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"University of Illinois at Urbana/Champaign, Urbana, IL, USA#TAB#","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082444941","display_name":"Sam S. Stone","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sam S. Stone","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Urbana, IL, USA","University of Illinois at Urbana/Champaign, Urbana, IL, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"University of Illinois at Urbana/Champaign, Urbana, IL, USA#TAB#","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026418735","display_name":"David B. Kirk","orcid":"https://orcid.org/0000-0002-4887-5098"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]},{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"David B. Kirk","raw_affiliation_strings":["NVIDIA Corporation, Santa Clara, CA, USA","[Nvidia Corporation, Santa Clara, CA, USA]"],"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]},{"raw_affiliation_string":"[Nvidia Corporation, Santa Clara, CA, USA]","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040404999","display_name":"Wen\u2010mei Hwu","orcid":"https://orcid.org/0000-0003-2532-5349"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wen-mei W. Hwu","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Urbana, IL, USA","University of Illinois at Urbana/Champaign, Urbana, IL, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"University of Illinois at Urbana/Champaign, Urbana, IL, USA#TAB#","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5014753963"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":117.7678,"has_fulltext":false,"cited_by_count":904,"citation_normalized_percentile":{"value":0.99954673,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"73","last_page":"82"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9067798852920532},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.79588782787323},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7035289406776428},{"id":"https://openalex.org/keywords/multithreading","display_name":"Multithreading","score":0.6616684198379517},{"id":"https://openalex.org/keywords/coprocessor","display_name":"Coprocessor","score":0.644463300704956},{"id":"https://openalex.org/keywords/porting","display_name":"Porting","score":0.6279251575469971},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.5750182867050171},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.564626932144165},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5418641567230225},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.5123430490493774},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.49312373995780945},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.4308527708053589},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.3823871314525604},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.24355053901672363}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9067798852920532},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.79588782787323},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7035289406776428},{"id":"https://openalex.org/C201410400","wikidata":"https://www.wikidata.org/wiki/Q1064412","display_name":"Multithreading","level":3,"score":0.6616684198379517},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.644463300704956},{"id":"https://openalex.org/C106251023","wikidata":"https://www.wikidata.org/wiki/Q851989","display_name":"Porting","level":3,"score":0.6279251575469971},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.5750182867050171},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.564626932144165},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5418641567230225},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.5123430490493774},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.49312373995780945},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.4308527708053589},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.3823871314525604},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.24355053901672363},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/1345206.1345220","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1345206.1345220","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 13th ACM SIGPLAN Symposium on Principles and practice of parallel programming","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.163.889","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.163.889","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://impact.crhc.illinois.edu/ftp/conference/ppopp-08-ryoo.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.306.9605","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.306.9605","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://grothoff.org/christian/teaching/2008/4704/p73-ryoo.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"},{"id":"https://openalex.org/F4320332222","display_name":"University of Illinois at Urbana-Champaign","ror":"https://ror.org/047426m28"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1482334991","https://openalex.org/W1494930385","https://openalex.org/W1536051636","https://openalex.org/W1589918049","https://openalex.org/W1596411313","https://openalex.org/W1964313035","https://openalex.org/W1969289728","https://openalex.org/W1982825626","https://openalex.org/W2063160743","https://openalex.org/W2098220211","https://openalex.org/W2102582914","https://openalex.org/W2108157916","https://openalex.org/W2108792719","https://openalex.org/W2121758805","https://openalex.org/W2135736783","https://openalex.org/W2139774022","https://openalex.org/W2159497832","https://openalex.org/W2163229756","https://openalex.org/W2169706611","https://openalex.org/W2620440553","https://openalex.org/W2620746779","https://openalex.org/W4300457049","https://openalex.org/W6628694728","https://openalex.org/W6631783819","https://openalex.org/W6635487442","https://openalex.org/W6635692132","https://openalex.org/W6738527174"],"related_works":["https://openalex.org/W2066653375","https://openalex.org/W1499890669","https://openalex.org/W2432304421","https://openalex.org/W4301139505","https://openalex.org/W2119220882","https://openalex.org/W2536597837","https://openalex.org/W2103261828","https://openalex.org/W2132643331","https://openalex.org/W2130912976","https://openalex.org/W2080109495"],"abstract_inverted_index":{"GPUs":[0],"have":[1],"recently":[2],"attracted":[3],"the":[4,28,71,94,109,113,122,133,138,170,182],"attention":[5],"of":[6,17,35,50,61,97,111,124,135,140,147,184,194],"many":[7],"application":[8,215],"developers":[9,107],"as":[10],"commodity":[11],"data-parallel":[12],"coprocessors.":[13],"The":[14,128],"newest":[15],"generations":[16],"GPU":[18],"architecture":[19],"provide":[20],"easier":[21],"programmability":[22],"and":[23,32,55,78,99,121,137,151,176,196,198,209],"increased":[24,158],"generality":[25],"while":[26],"maintaining":[27],"tremendous":[29],"memory":[30,102,142,153,165,174],"bandwidth":[31],"computational":[33],"power":[34],"traditional":[36],"GPUs.":[37],"This":[38],"opportunity":[39],"should":[40],"redirect":[41],"efforts":[42],"in":[43,206],"GPGPU":[44],"research":[45],"from":[46],"ad":[47],"hoc":[48],"porting":[49],"applications":[51,195],"to":[52,63,83,92,130,163,166,169,180,203,212],"establishing":[53],"principles":[54],"strategies":[56,190],"that":[57],"allow":[58],"efficient":[59],"mapping":[60],"computation":[62],"graphics":[64],"hardware.":[65],"In":[66],"this":[67,86],"work":[68],"we":[69],"discuss":[70],"GeForce":[72],"8800":[73],"GTX":[74],"processor's":[75],"organization,":[76],"features,":[77],"generalized":[79],"optimization":[80],"strategies.":[81],"Key":[82],"performance":[84,159],"on":[85],"platform":[87],"is":[88],"using":[89],"massive":[90],"multithreading":[91],"utilize":[93],"large":[95],"number":[96,123,134,146,183],"cores":[98],"hide":[100],"global":[101,152],"latency.":[103],"To":[104],"achieve":[105,199],"this,":[106],"face":[108],"challenge":[110],"striking":[112],"right":[114],"balance":[115],"between":[116,200,210],"each":[117],"thread's":[118],"resource":[119],"usage":[120],"simultaneously":[125],"active":[126],"threads.":[127],"resources":[129],"manage":[131],"include":[132],"registers":[136],"amount":[139],"on-chip":[141],"used":[143],"per":[144,149],"thread,":[145],"threads":[148],"multiprocessor,":[150],"bandwidth.":[154],"We":[155,187],"also":[156],"obtain":[157],"by":[160],"reordering":[161],"accesses":[162],"off-chip":[164],"combine":[167],"requests":[168],"same":[171],"or":[172],"contiguous":[173],"locations":[175],"apply":[177,188],"classical":[178],"optimizations":[179],"reduce":[181],"executed":[185],"operations.":[186],"these":[189],"across":[191],"a":[192,201],"variety":[193],"domains":[197],"10.5X":[202],"457X":[204],"speedup":[205],"kernel":[207],"codes":[208],"1.16X":[211],"431X":[213],"total":[214],"speedup.":[216]},"counts_by_year":[{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":13},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":10},{"year":2021,"cited_by_count":15},{"year":2020,"cited_by_count":20},{"year":2019,"cited_by_count":28},{"year":2018,"cited_by_count":24},{"year":2017,"cited_by_count":48},{"year":2016,"cited_by_count":49},{"year":2015,"cited_by_count":64},{"year":2014,"cited_by_count":62},{"year":2013,"cited_by_count":101},{"year":2012,"cited_by_count":113}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
