{"id":"https://openalex.org/W4214497478","doi":"https://doi.org/10.1201/b11417-14","title":"Scalable Manycore Computing with CUDA","display_name":"Scalable Manycore Computing with CUDA","publication_year":2011,"publication_date":"2011-12-12","ids":{"openalex":"https://openalex.org/W4214497478","doi":"https://doi.org/10.1201/b11417-14"},"language":"en","primary_location":{"id":"doi:10.1201/b11417-14","is_oa":false,"landing_page_url":"https://doi.org/10.1201/b11417-14","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Fundamentals of Multicore Software Development","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024606205","display_name":"Michael Garland","orcid":"https://orcid.org/0000-0001-6093-7602"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Michael Garland","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077691647","display_name":"Vinod Grover","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vinod Grover","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5074818897","display_name":"Kevin Skadron","orcid":"https://orcid.org/0000-0002-8091-9302"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kevin Skadron","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5024606205"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.38601399,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"143","last_page":"166"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.807523250579834},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.769447922706604},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7205309271812439},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6832504272460938},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.5565078258514404},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.4206346869468689},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.18947288393974304}],"concepts":[{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.807523250579834},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.769447922706604},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7205309271812439},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6832504272460938},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.5565078258514404},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.4206346869468689},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.18947288393974304}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1201/b11417-14","is_oa":false,"landing_page_url":"https://doi.org/10.1201/b11417-14","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Fundamentals of Multicore Software Development","raw_type":"book-chapter"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.44999998807907104,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":1,"referenced_works":["https://openalex.org/W2165365531"],"related_works":["https://openalex.org/W2011840458","https://openalex.org/W3213381848","https://openalex.org/W2017587301","https://openalex.org/W2005148983","https://openalex.org/W2030707850","https://openalex.org/W4386975487","https://openalex.org/W2096672917","https://openalex.org/W2009169896","https://openalex.org/W2392023973","https://openalex.org/W2939411666"],"abstract_inverted_index":{"The":[0,142],"applications":[1],"that":[2,85,268,279,289,397],"seemmost":[3],"likely":[4],"to":[5,69,100,103,176,197,208,228,343],"benefit":[6],"frommajor":[7],"advances":[8],"in":[9,110,226,339,416],"computational":[10,421],"power":[11,138],"and":[12,51,95,132,204,286,292,321,366,377,383,406,423,426,433],"drive":[13],"future":[14],"processor":[15,155,252],"development":[16],"appear":[17],"increasingly":[18],"throughput":[19,127,134,144],"oriented,":[20],"with":[21,221,236,253,348],"products":[22],"optimized":[23],"more":[24],"for":[25,114,243,325,355],"data":[26,49,352],"or":[27,139],"task":[28],"parallelism":[29,164,239,396],"depending":[30],"on":[31,117,165],"their":[32],"market":[33],"focus":[34],"(e.g.,":[35],"HPC":[36],"vs.":[37,39],"transactional":[38],"multimedia).":[40],"Examples":[41],"include":[42,264],"the":[43,66,72,105,159,179,209,213,229,281,305,409],"simulation":[44],"of":[45,65,74,161,181,212,231,283,294,297,360,375,387,395,400,420],"large":[46,358],"physical":[47],"systems,":[48],"mining,":[50],"ray":[52],"tracing.":[53],"Throughputoriented":[54],"workload":[55],"design":[56,180],"emphasizes":[57],"many":[58],"small":[59],"cores":[60,80,123],"because":[61],"they":[62,335],"eliminate":[63],"most":[64,153,174],"hardware":[67,193,267],"needed":[68,302],"speed":[70],"up":[71],"performance":[73,116,256],"an":[75],"individual":[76,119],"thread.":[77,120],"These":[78],"simple":[79,122],"are":[81,217,300,331,336,413,427],"then":[82],"multithreaded,":[83],"so":[84],"when":[86,303],"any":[87,118,258],"one":[88],"thread":[89],"stalls,":[90],"other":[91,259,404],"threads":[92,389],"can":[93,98,314],"run":[94],"every":[96],"core":[97],"continue":[99],"be":[101,315],"used":[102,415],"maximize":[104],"application\u2019s":[106],"overall":[107],"throughput.":[108],"Multithreading":[109],"turn":[111],"relaxes":[112],"requirements":[113],"high":[115,143],"Small,":[121],"therefore":[124],"provide":[125,378],"greater":[126,133,254],"per":[128,390],"unitof":[129],"chip":[130],"area":[131],"within":[135],"a":[136,249,308,340,349,357,363,393,417],"given":[137],"cooling":[140],"constraint.":[141],"provided":[145],"by":[146,152,429],"\u201cmanycore\u201d":[147],"organizations":[148],"has":[149],"been":[150],"recognized":[151],"major":[154,431],"vendors.":[156],"To":[157],"understand":[158],"implications":[160],"rapidly":[162],"increasing":[163,241],"both":[166],"hardwareand":[167],"software":[168,435],"design,":[169],"we":[170],"believe":[171],"it":[172],"is":[173,247,398],"productive":[175],"look":[177],"at":[178],"modern":[182],"GPUs":[183,190,216,262,330,371,412],"(Graphics":[184],"Processing":[185],"Units).":[186],"A":[187],"decade":[188],"ago,":[189],"were":[191],"fixed-function":[192,210],"devices":[194,211],"designed":[195],"specifically":[196],"accelerate":[198,280,290],"graphics":[199],"APIs":[200],"such":[201,276,317],"as":[202,277,307,318],"OpenGL":[203],"Direct3D.":[205],"In":[206],"contrast":[207],"past,":[214],"today\u2019s":[215],"fully":[218],"programmable":[219],"microprocessors":[220],"general-purpose":[222,244,250,270,309,332],"architectures.":[223],"Having":[224],"evolved":[225],"response":[227],"needs":[230],"computer":[232],"graphics-an":[233],"application":[234],"domain":[235],"tremendous":[237],"inherent":[238],"but":[240],"need":[242],"programmabilitythe":[245],"GPU":[246,306,322],"already":[248],"manycore":[251,310,333,410],"peak":[255],"than":[257,403],"commodity":[260],"processor.":[261],"simply":[263],"some":[265,313,326],"additional":[266],"typical,":[269],"CPUs":[271],"do":[272],"not,":[273],"mainly":[274],"units":[275,288,299],"rasterizers":[278],"rendering":[282],"3D":[284],"polygons":[285],"texture":[287,319],"filtering":[291],"blending":[293],"images.":[295],"Most":[296],"these":[298],"not":[301],"using":[304],"processor,":[311],"although":[312],"useful,":[316],"caches":[320],"instruction-set":[323],"support":[324],"transcendental":[327],"functions.":[328],"Because":[329],"processors,":[334],"typically":[337],"programmed":[338],"fashion":[341],"similar":[342],"traditional":[344],"parallel":[345,388],"programming":[346],"models,":[347],"single-program,":[350],"multiple":[351],"(SPMD)":[353],"model":[354],"launching":[356],"number":[359],"concurrent":[361],"threads,":[362],"unified":[364],"memory,":[365],"standard":[367],"synchronization":[368],"mechanisms.":[369],"High-end":[370],"cost":[372],"just":[373],"hundreds":[374],"dollars":[376],"teraflop":[379],"performancewhile":[380],"creating,":[381],"executing,":[382],"retiring":[384],"literally":[385],"billions":[386],"second,":[391],"exhibiting":[392],"scale":[394],"orders":[399],"magnitude":[401],"higher":[402],"platforms":[405],"truly":[407],"embodies":[408],"paradigm.":[411],"now":[414],"wide":[418],"range":[419],"science":[422],"engineering":[424],"applications,":[425],"supported":[428],"several":[430],"libraries":[432],"commercial":[434],"products.":[436]},"counts_by_year":[{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2022-03-02T00:00:00"}
