{"id":"https://openalex.org/W3145971282","doi":"https://doi.org/10.1109/isca.2012.6237038","title":"Can traditional programming bridge the Ninja performance gap for parallel computing applications?","display_name":"Can traditional programming bridge the Ninja performance gap for parallel computing applications?","publication_year":2012,"publication_date":"2012-06-01","ids":{"openalex":"https://openalex.org/W3145971282","doi":"https://doi.org/10.1109/isca.2012.6237038","mag":"3145971282"},"language":"en","primary_location":{"id":"doi:10.1109/isca.2012.6237038","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isca.2012.6237038","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 39th Annual International Symposium on Computer Architecture (ISCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111985072","display_name":"Nadathur Satish","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Nadathur Satish","raw_affiliation_strings":["Parallel Computing Laboratory, Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Laboratory, Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101667271","display_name":"Changkyu Kim","orcid":"https://orcid.org/0000-0002-0283-8371"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Changkyu Kim","raw_affiliation_strings":["Parallel Computing Laboratory, Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Laboratory, Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059686266","display_name":"Jatin Chhugani","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jatin Chhugani","raw_affiliation_strings":["Parallel Computing Laboratory, Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Laboratory, Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063368240","display_name":"Hideki Saito","orcid":"https://orcid.org/0009-0004-5529-7048"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hideki Saito","raw_affiliation_strings":["Intel Compiler Laboratory, Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Intel Compiler Laboratory, Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029238459","display_name":"Rakesh Krishnaiyer","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rakesh Krishnaiyer","raw_affiliation_strings":["Intel Compiler Laboratory, Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Intel Compiler Laboratory, Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007892541","display_name":"Mikhail Smelyanskiy","orcid":"https://orcid.org/0000-0002-2433-6110"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mikhail Smelyanskiy","raw_affiliation_strings":["Parallel Computing Laboratory, Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Laboratory, Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009108433","display_name":"Milind Girkar","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Milind Girkar","raw_affiliation_strings":["Intel Compiler Laboratory, Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Intel Compiler Laboratory, Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032238070","display_name":"Pradeep Dubey","orcid":"https://orcid.org/0000-0001-5853-0619"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pradeep Dubey","raw_affiliation_strings":["Parallel Computing Laboratory, Intel Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Laboratory, Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5111985072"],"corresponding_institution_ids":["https://openalex.org/I1343180700"],"apc_list":null,"apc_paid":null,"fwci":4.6409,"has_fulltext":false,"cited_by_count":36,"citation_normalized_percentile":{"value":0.95268878,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"440","last_page":"451"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8508247137069702},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.7436566352844238},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7374833822250366},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.6741061806678772},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.599429726600647},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5976783037185669},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5634945631027222},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.5539379715919495},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.5071883201599121},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.4719609022140503},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.46216344833374023},{"id":"https://openalex.org/keywords/programming-paradigm","display_name":"Programming paradigm","score":0.4488150477409363},{"id":"https://openalex.org/keywords/semantic-gap","display_name":"Semantic gap","score":0.4249640107154846},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3991943895816803},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.396350622177124},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.15128645300865173}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8508247137069702},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.7436566352844238},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7374833822250366},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.6741061806678772},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.599429726600647},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5976783037185669},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5634945631027222},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.5539379715919495},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.5071883201599121},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.4719609022140503},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.46216344833374023},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.4488150477409363},{"id":"https://openalex.org/C86034646","wikidata":"https://www.wikidata.org/wiki/Q474311","display_name":"Semantic gap","level":4,"score":0.4249640107154846},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3991943895816803},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.396350622177124},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.15128645300865173},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C126322002","wikidata":"https://www.wikidata.org/wiki/Q11180","display_name":"Internal medicine","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/isca.2012.6237038","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isca.2012.6237038","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 39th Annual International Symposium on Computer Architecture (ISCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1526547736","https://openalex.org/W1556859836","https://openalex.org/W1980322368","https://openalex.org/W1990800384","https://openalex.org/W1992851788","https://openalex.org/W2004714565","https://openalex.org/W2010509152","https://openalex.org/W2023636146","https://openalex.org/W2039378765","https://openalex.org/W2059710204","https://openalex.org/W2069881969","https://openalex.org/W2071208935","https://openalex.org/W2077791698","https://openalex.org/W2080560844","https://openalex.org/W2082695854","https://openalex.org/W2088401441","https://openalex.org/W2089064856","https://openalex.org/W2091434413","https://openalex.org/W2099404643","https://openalex.org/W2117202489","https://openalex.org/W2118031182","https://openalex.org/W2121082877","https://openalex.org/W2127324789","https://openalex.org/W2128859546","https://openalex.org/W2129232868","https://openalex.org/W2146558186","https://openalex.org/W2148038801","https://openalex.org/W2149743155","https://openalex.org/W2151224499","https://openalex.org/W2157144208","https://openalex.org/W2159481344","https://openalex.org/W2160523651","https://openalex.org/W2166955231","https://openalex.org/W2169875292","https://openalex.org/W2171296521","https://openalex.org/W2603663752","https://openalex.org/W4239698279","https://openalex.org/W4247263453","https://openalex.org/W6651250245","https://openalex.org/W6678286823","https://openalex.org/W6678821293","https://openalex.org/W6681144579","https://openalex.org/W6735820530"],"related_works":["https://openalex.org/W2766828645","https://openalex.org/W2994245508","https://openalex.org/W4242172182","https://openalex.org/W2082875307","https://openalex.org/W4237780868","https://openalex.org/W4285302443","https://openalex.org/W2019451907","https://openalex.org/W2127594310","https://openalex.org/W2137845512","https://openalex.org/W4236526691"],"abstract_inverted_index":{"Current":[0],"processor":[1],"trends":[2],"of":[3,67,76,102,132,146,179,199,232,280],"integrating":[4],"more":[5,23,264,287],"cores":[6,265],"with":[7,12,184],"wider":[8,267],"SIMD":[9],"units,":[10],"along":[11],"a":[13,130,152,177,286],"deeper":[14],"and":[15,47,62,71,80,88,122,164,235,266,284,289],"complex":[16],"memory":[17],"hierarchy,":[18],"have":[19],"made":[20],"it":[21],"increasingly":[22],"challenging":[24],"to":[25,38,43,149,196,211],"extract":[26],"performance":[27,90,109,291],"from":[28],"applications.":[29],"It":[30],"is":[31,107,117,141],"believed":[32],"by":[33],"some":[34],"that":[35,116,139,165,227,272,299],"traditional":[36,68],"approaches":[37],"programming":[39,69,207],"do":[40],"not":[41,304],"apply":[42],"these":[44,233],"modern":[45,126,187],"processors":[46,79],"hence":[48],"radical":[49,300],"new":[50],"languages":[51],"must":[52],"be":[53],"discovered.":[54],"In":[55],"this":[56,60,166],"paper,":[57],"we":[58,137,273],"question":[59],"thinking":[61],"offer":[63,285],"evidence":[64,298],"in":[65,84,186,216],"support":[66,224],"methods":[70],"the":[72,100,103,108,193,212,230,247,276,281],"performance-vs-programming":[73],"effort":[74,215],"effectiveness":[75],"common":[77],"multi-core":[78],"upcoming":[81,248],"manycore":[82],"architectures":[83],"delivering":[85],"significant":[86],"speedup,":[87],"close-to-optimal":[89],"for":[91,151,225,246],"commonly":[92],"used":[93],"parallel":[94],"computing":[95,135],"workloads.":[96],"We":[97,174,220,241,269],"first":[98],"quantify":[99],"extent":[101],"\u201cNinja":[104],"gap\u201d,":[105],"which":[106,262],"gap":[110,145,167,195,283],"between":[111],"naively":[112],"written":[113],"C/C++":[114],"code":[115,124],"parallelism":[118],"unaware":[119],"(often":[120],"serial)":[121],"best-optimized":[123],"on":[125],"multi-/many-core":[127],"processors.":[128],"Using":[129],"set":[131,178],"representative":[133],"throughput":[134],"benchmarks,":[136],"show":[138,175,242],"there":[140],"an":[142,197],"average":[143,198],"Ninja":[144,194,218,282],"24X":[147],"(up":[148],"53X)":[150],"recent":[153],"6-core":[154],"Intel":[155,249],"<sup":[156,250,258],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[157,251,259],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">\u00ae</sup>":[158,252,260],"Core\u2122":[159],"i7":[160],"X980":[161],"Westmere":[162],"CPU,":[163],"if":[168],"left":[169],"unaddressed":[170],"will":[171],"inevitably":[172],"increase.":[173],"how":[176],"well-known":[180],"algorithmic":[181],"changes":[182,203,234,302],"coupled":[183],"advancements":[185],"compiler":[188],"technology":[189],"can":[190,228,274],"bring":[191],"down":[192],"just":[200],"1.3X.":[201],"These":[202],"typically":[204],"require":[205],"low":[206],"effort,":[208],"as":[209],"compared":[210],"very":[213],"high":[214],"producing":[217],"code.":[219],"also":[221],"discuss":[222],"hardware":[223],"programmability":[226],"reduce":[229],"impact":[231],"even":[236],"further":[237],"increase":[238],"programmer":[239],"productivity.":[240],"equally":[243],"encouraging":[244],"results":[245],"Many":[253],"Integrated":[254],"Core":[255],"architecture":[256],"(Intel":[257],"MIC)":[261],"has":[263],"SIMD.":[268],"thus":[270],"demonstrate":[271],"contain":[275],"otherwise":[277],"uncontrolled":[278],"growth":[279,292],"stable":[288],"predictable":[290],"over":[293],"future":[294],"architectures,":[295],"offering":[296],"strong":[297],"language":[301],"are":[303],"required.":[305]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":5},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":7},{"year":2012,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
