{"id":"https://openalex.org/W2124556751","doi":"https://doi.org/10.1145/1772954.1772971","title":"Efficient compilation of fine-grained SPMD-threaded programs for multicore CPUs","display_name":"Efficient compilation of fine-grained SPMD-threaded programs for multicore CPUs","publication_year":2010,"publication_date":"2010-04-24","ids":{"openalex":"https://openalex.org/W2124556751","doi":"https://doi.org/10.1145/1772954.1772971","mag":"2124556751"},"language":"en","primary_location":{"id":"doi:10.1145/1772954.1772971","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1772954.1772971","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th annual IEEE/ACM international symposium on Code generation and optimization","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034958091","display_name":"John A. Stratton","orcid":"https://orcid.org/0000-0002-3625-9781"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]},{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"John A. Stratton","raw_affiliation_strings":["NVIDIA Corporation / University of Illinois at Urbana-Champaign, Champaign, IL, USA","NVIDIA Corporation / University of Illinois at Urbana-Champaign, Champaign, IL, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation / University of Illinois at Urbana-Champaign, Champaign, IL, USA","institution_ids":["https://openalex.org/I4210127875","https://openalex.org/I157725225"]},{"raw_affiliation_string":"NVIDIA Corporation / University of Illinois at Urbana-Champaign, Champaign, IL, USA#TAB#","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077691647","display_name":"Vinod Grover","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]},{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Vinod Grover","raw_affiliation_strings":["NVIDIA Corporation, Santa Clara, CA, USA","[Nvidia Corporation, Santa Clara, CA, USA]"],"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]},{"raw_affiliation_string":"[Nvidia Corporation, Santa Clara, CA, USA]","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050458084","display_name":"Jaydeep Marathe","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]},{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Jaydeep Marathe","raw_affiliation_strings":["NVIDIA Corporation, Santa Clara, CA, USA","[Nvidia Corporation, Santa Clara, CA, USA]"],"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]},{"raw_affiliation_string":"[Nvidia Corporation, Santa Clara, CA, USA]","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110261985","display_name":"Bastiaan Aarts","orcid":null},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]},{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Bastiaan Aarts","raw_affiliation_strings":["NVIDIA Corporation, Santa Clara, CA, USA","[Nvidia Corporation, Santa Clara, CA, USA]"],"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]},{"raw_affiliation_string":"[Nvidia Corporation, Santa Clara, CA, USA]","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090050407","display_name":"Mike Murphy","orcid":"https://orcid.org/0000-0002-8343-0684"},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]},{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Mike Murphy","raw_affiliation_strings":["NVIDIA Corporation, Santa Clara, CA, USA","[Nvidia Corporation, Santa Clara, CA, USA]"],"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]},{"raw_affiliation_string":"[Nvidia Corporation, Santa Clara, CA, USA]","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112211978","display_name":"Ziang Hu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]},{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Ziang Hu","raw_affiliation_strings":["NVIDIA Corporation, Santa Clara, CA, USA","[Nvidia Corporation, Santa Clara, CA, USA]"],"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]},{"raw_affiliation_string":"[Nvidia Corporation, Santa Clara, CA, USA]","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040404999","display_name":"Wen\u2010mei Hwu","orcid":"https://orcid.org/0000-0003-2532-5349"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wen-mei W. Hwu","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Urbana, IL, USA","University of Illinois at Urbana/Champaign, Urbana, IL, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"University of Illinois at Urbana/Champaign, Urbana, IL, USA#TAB#","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5034958091"],"corresponding_institution_ids":["https://openalex.org/I157725225","https://openalex.org/I4210127875"],"apc_list":null,"apc_paid":null,"fwci":10.0993,"has_fulltext":false,"cited_by_count":83,"citation_normalized_percentile":{"value":0.98493864,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"111","last_page":"119"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8748151659965515},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8195697069168091},{"id":"https://openalex.org/keywords/multithreading","display_name":"Multithreading","score":0.7559349536895752},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.7337114810943604},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6684202551841736},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.6606616973876953},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6108890175819397},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5997880697250366},{"id":"https://openalex.org/keywords/spmd","display_name":"SPMD","score":0.5361834168434143},{"id":"https://openalex.org/keywords/task-parallelism","display_name":"Task parallelism","score":0.4402029812335968},{"id":"https://openalex.org/keywords/programming-paradigm","display_name":"Programming paradigm","score":0.43470829725265503},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.4228513836860657},{"id":"https://openalex.org/keywords/posix-threads","display_name":"POSIX Threads","score":0.4186423718929291},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3786592185497284},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.3330654203891754},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.3318118453025818},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1909777820110321}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8748151659965515},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8195697069168091},{"id":"https://openalex.org/C201410400","wikidata":"https://www.wikidata.org/wiki/Q1064412","display_name":"Multithreading","level":3,"score":0.7559349536895752},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.7337114810943604},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6684202551841736},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.6606616973876953},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6108890175819397},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5997880697250366},{"id":"https://openalex.org/C7042729","wikidata":"https://www.wikidata.org/wiki/Q2289219","display_name":"SPMD","level":2,"score":0.5361834168434143},{"id":"https://openalex.org/C42992933","wikidata":"https://www.wikidata.org/wiki/Q691169","display_name":"Task parallelism","level":3,"score":0.4402029812335968},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.43470829725265503},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.4228513836860657},{"id":"https://openalex.org/C41138395","wikidata":"https://www.wikidata.org/wiki/Q928112","display_name":"POSIX Threads","level":3,"score":0.4186423718929291},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3786592185497284},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.3330654203891754},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.3318118453025818},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1909777820110321}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1772954.1772971","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1772954.1772971","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th annual IEEE/ACM international symposium on Code generation and optimization","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1502558230","https://openalex.org/W1992851788","https://openalex.org/W2002427499","https://openalex.org/W2020634604","https://openalex.org/W2028499920","https://openalex.org/W2032391783","https://openalex.org/W2042895571","https://openalex.org/W2045271686","https://openalex.org/W2059654835","https://openalex.org/W2136763012","https://openalex.org/W2140313023","https://openalex.org/W2146130982","https://openalex.org/W2151406013","https://openalex.org/W2159350554","https://openalex.org/W2159497832","https://openalex.org/W2166621589","https://openalex.org/W2170634604","https://openalex.org/W2295450001","https://openalex.org/W2582254694","https://openalex.org/W3007497337","https://openalex.org/W3138798301","https://openalex.org/W3150372793"],"related_works":["https://openalex.org/W2108723143","https://openalex.org/W4294310504","https://openalex.org/W2338363223","https://openalex.org/W2158978940","https://openalex.org/W2573076482","https://openalex.org/W4283366496","https://openalex.org/W2082701182","https://openalex.org/W2770398737","https://openalex.org/W2149156503","https://openalex.org/W2003933101"],"abstract_inverted_index":{"In":[0],"this":[1],"paper":[2],"we":[3],"describe":[4,41],"techniques":[5,71],"for":[6,26,37,43,78,103],"compiling":[7],"fine-grained":[8,45],"SPMD-threaded":[9],"programs,":[10],"expressed":[11],"in":[12,47,72],"programming":[13,81],"models":[14],"such":[15],"as":[16],"OpenCL":[17],"or":[18],"CUDA,":[19],"to":[20],"multicore":[21,38],"execution":[22],"platforms.":[23,39],"Programs":[24],"developed":[25],"manycore":[27],"processors":[28,122],"typically":[29],"express":[30],"finer":[31],"thread-level":[32],"parallelism":[33],"than":[34],"is":[35],"appropriate":[36],"We":[40,68],"options":[42],"implementing":[44],"threading":[46],"software,":[48],"and":[49,61,76],"find":[50],"that":[51],"reasonable":[52],"restrictions":[53],"on":[54,120],"the":[55,79,96,101,126],"synchronization":[56],"model":[57,82],"enable":[58],"significant":[59],"optimizations":[60],"performance":[62,93],"improvements":[63],"over":[64],"a":[65,73],"baseline":[66],"approach.":[67],"evaluate":[69],"these":[70],"production-level":[74],"compiler":[75],"runtime":[77],"CUDA":[80],"targeting":[83],"modern":[84],"CPUs.":[85],"Applications":[86],"tested":[87],"with":[88,95],"our":[89],"tool":[90],"often":[91],"showed":[92],"parity":[94],"compiled":[97],"C":[98],"version":[99],"of":[100,111,117],"application":[102],"single-thread":[104],"performance.":[105],"With":[106],"modest":[107],"coarse-grained":[108],"multithreading":[109],"typical":[110],"today's":[112],"CPU":[113],"architectures,":[114],"an":[115],"average":[116],"3.4x":[118],"speedup":[119],"4":[121],"was":[123],"observed":[124],"across":[125],"test":[127],"applications.":[128]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":7},{"year":2015,"cited_by_count":9},{"year":2014,"cited_by_count":9},{"year":2013,"cited_by_count":16},{"year":2012,"cited_by_count":11}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
