{"id":"https://openalex.org/W2135682468","doi":"https://doi.org/10.1109/ipdps.2010.5470421","title":"An auto-tuning framework for parallel multicore stencil computations","display_name":"An auto-tuning framework for parallel multicore stencil computations","publication_year":2010,"publication_date":"2010-01-01","ids":{"openalex":"https://openalex.org/W2135682468","doi":"https://doi.org/10.1109/ipdps.2010.5470421","mag":"2135682468"},"language":"en","primary_location":{"id":"doi:10.1109/ipdps.2010.5470421","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps.2010.5470421","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2010 IEEE International Symposium on Parallel &amp; Distributed Processing (IPDPS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://escholarship.org/uc/item/3nv8w3kw","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039453861","display_name":"Shoaib Kamil","orcid":"https://orcid.org/0000-0001-5965-3717"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]},{"id":"https://openalex.org/I4210151627","display_name":"National Energy Research Scientific Computing Center","ror":"https://ror.org/05v3mvq14","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521","https://openalex.org/I4210151627"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shoaib Kamil","raw_affiliation_strings":["CRD NERSC, Lawrence Berkeley National Laboratory, Berkeley, CA, USA","EECS Department, University of California, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"CRD NERSC, Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060","https://openalex.org/I4210151627"]},{"raw_affiliation_string":"EECS Department, University of California, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005140361","display_name":"Cy Chan","orcid":"https://orcid.org/0000-0001-6881-827X"},"institutions":[{"id":"https://openalex.org/I4210151627","display_name":"National Energy Research Scientific Computing Center","ror":"https://ror.org/05v3mvq14","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521","https://openalex.org/I4210151627"]},{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]},{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cy Chan","raw_affiliation_strings":["CRD NERSC, Lawrence Berkeley National Laboratory, Berkeley, CA, USA","Massachusetts Institute of Technology, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"CRD NERSC, Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060","https://openalex.org/I4210151627"]},{"raw_affiliation_string":"Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113842004","display_name":"Leonid Oliker","orcid":"https://orcid.org/0000-0002-7923-2896"},"institutions":[{"id":"https://openalex.org/I4210151627","display_name":"National Energy Research Scientific Computing Center","ror":"https://ror.org/05v3mvq14","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521","https://openalex.org/I4210151627"]},{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Leonid Oliker","raw_affiliation_strings":["CRD NERSC, Lawrence Berkeley National Laboratory, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"CRD NERSC, Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060","https://openalex.org/I4210151627"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010873686","display_name":"John Shalf","orcid":"https://orcid.org/0000-0002-0608-3690"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]},{"id":"https://openalex.org/I4210151627","display_name":"National Energy Research Scientific Computing Center","ror":"https://ror.org/05v3mvq14","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521","https://openalex.org/I4210151627"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John Shalf","raw_affiliation_strings":["CRD NERSC, Lawrence Berkeley National Laboratory, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"CRD NERSC, Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060","https://openalex.org/I4210151627"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102746800","display_name":"Samuel Williams","orcid":"https://orcid.org/0000-0002-8327-5717"},"institutions":[{"id":"https://openalex.org/I4210151627","display_name":"National Energy Research Scientific Computing Center","ror":"https://ror.org/05v3mvq14","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521","https://openalex.org/I4210151627"]},{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Samuel Williams","raw_affiliation_strings":["CRD NERSC, Lawrence Berkeley National Laboratory, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"CRD NERSC, Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060","https://openalex.org/I4210151627"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5039453861"],"corresponding_institution_ids":["https://openalex.org/I148283060","https://openalex.org/I4210151627","https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":19.2045,"has_fulltext":true,"cited_by_count":212,"citation_normalized_percentile":{"value":0.99507023,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stencil","display_name":"Stencil","score":0.9522050619125366},{"id":"https://openalex.org/keywords/software-portability","display_name":"Software portability","score":0.870568037033081},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8263821601867676},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7449702620506287},{"id":"https://openalex.org/keywords/programmer","display_name":"Programmer","score":0.7238181829452515},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6924494504928589},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.591309130191803},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.590647280216217},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5703895092010498},{"id":"https://openalex.org/keywords/fortran","display_name":"Fortran","score":0.49395400285720825},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.48619574308395386},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4818195402622223},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4471853971481323},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.28211236000061035},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.26095739006996155},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.22479429841041565},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1723516285419464}],"concepts":[{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.9522050619125366},{"id":"https://openalex.org/C63000827","wikidata":"https://www.wikidata.org/wiki/Q3080428","display_name":"Software portability","level":2,"score":0.870568037033081},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8263821601867676},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7449702620506287},{"id":"https://openalex.org/C2778514511","wikidata":"https://www.wikidata.org/wiki/Q1374194","display_name":"Programmer","level":2,"score":0.7238181829452515},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6924494504928589},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.591309130191803},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.590647280216217},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5703895092010498},{"id":"https://openalex.org/C2778241615","wikidata":"https://www.wikidata.org/wiki/Q83303","display_name":"Fortran","level":2,"score":0.49395400285720825},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.48619574308395386},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4818195402622223},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4471853971481323},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.28211236000061035},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.26095739006996155},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.22479429841041565},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1723516285419464},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/ipdps.2010.5470421","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps.2010.5470421","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2010 IEEE International Symposium on Parallel &amp; Distributed Processing (IPDPS)","raw_type":"proceedings-article"},{"id":"pmh:oai:escholarship.org:ark:/13030/qt3nv8w3kw","is_oa":true,"landing_page_url":"https://escholarship.org/uc/item/3nv8w3kw","pdf_url":"https://escholarship.org/uc/item/3nv8w3kw","source":{"id":"https://openalex.org/S4306400115","display_name":"eScholarship (California Digital Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801248553","host_organization_name":"California Digital Library","host_organization_lineage":["https://openalex.org/I2801248553"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.188.9104","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.188.9104","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://people.csail.mit.edu/cychan/papers/ipdps10.pdf","raw_type":"text"},{"id":"pmh:qt3nv8w3kw","is_oa":false,"landing_page_url":"http://www.escholarship.org/uc/item/3nv8w3kw","pdf_url":null,"source":{"id":"https://openalex.org/S4306400115","display_name":"eScholarship (California Digital Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801248553","host_organization_name":"California Digital Library","host_organization_lineage":["https://openalex.org/I2801248553"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Kamil, S; Chan, C; Oliker, L; Shall, J; &amp; Williams, S. (2010). An auto-tuning framework for parallel multicore stencil computations. Proceedings of the 2010 IEEE International Symposium on Parallel and Distributed Processing, IPDPS 2010. doi: 10.1109/IPDPS.2010.5470421. Lawrence Berkeley National Laboratory: Lawrence Berkeley National Laboratory. Retrieved from: http://www.escholarship.org/uc/item/3nv8w3kw","raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:escholarship.org:ark:/13030/qt3nv8w3kw","is_oa":true,"landing_page_url":"https://escholarship.org/uc/item/3nv8w3kw","pdf_url":"https://escholarship.org/uc/item/3nv8w3kw","source":{"id":"https://openalex.org/S4306400115","display_name":"eScholarship (California Digital Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801248553","host_organization_name":"California Digital Library","host_organization_lineage":["https://openalex.org/I2801248553"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2135682468.pdf","grobid_xml":"https://content.openalex.org/works/W2135682468.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W1519701134","https://openalex.org/W1551023754","https://openalex.org/W1964031104","https://openalex.org/W1997147891","https://openalex.org/W2002555321","https://openalex.org/W2034761517","https://openalex.org/W2056971515","https://openalex.org/W2076848169","https://openalex.org/W2099625934","https://openalex.org/W2119042753","https://openalex.org/W2121082877","https://openalex.org/W2129471558","https://openalex.org/W2136952590","https://openalex.org/W2148038801","https://openalex.org/W2152914333","https://openalex.org/W2154786353","https://openalex.org/W2164197394","https://openalex.org/W2175606803","https://openalex.org/W2293033107","https://openalex.org/W2413556102","https://openalex.org/W2461867464","https://openalex.org/W3004184795","https://openalex.org/W3151489216","https://openalex.org/W3152199537","https://openalex.org/W3203568064","https://openalex.org/W4249968602","https://openalex.org/W6631277133","https://openalex.org/W6678286823","https://openalex.org/W6718912861"],"related_works":["https://openalex.org/W3105129168","https://openalex.org/W2804920739","https://openalex.org/W4316371992","https://openalex.org/W2186216222","https://openalex.org/W3020739840","https://openalex.org/W2913998709","https://openalex.org/W4386875822","https://openalex.org/W3177128669","https://openalex.org/W2045320366","https://openalex.org/W2093559318"],"abstract_inverted_index":{"Although":[0],"stencil":[1,27,46,62],"auto-tuning":[2,47],"has":[3,13],"shown":[4],"tremendous":[5],"potential":[6],"in":[7,21,30,68],"effectively":[8],"utilizing":[9],"architectural":[10,126],"resources,":[11],"it":[12],"hitherto":[14],"been":[15],"limited":[16],"to":[17,37,107],"single":[18],"kernel":[19],"instantiations;":[20],"addition,":[22],"the":[23,82,91,111],"large":[24],"variety":[25],"of":[26,105],"kernels":[28],"used":[29],"practice":[31],"makes":[32],"this":[33],"computation":[34],"pattern":[35],"difficult":[36],"assemble":[38],"into":[39,64],"a":[40,45,57],"library.":[41],"This":[42],"work":[43],"presents":[44],"framework":[48],"that":[49,97,118],"significantly":[50],"advances":[51],"programmer":[52,128],"productivity":[53],"by":[54],"automatically":[55],"converting":[56],"straightforward":[58],"sequential":[59],"Fortran":[60],"95":[61],"expression":[63],"tuned":[65],"parallel":[66],"implementations":[67],"Fortran,":[69],"C,":[70],"or":[71],"CUDA,":[72],"thus":[73],"allowing":[74],"performance":[75,103,130],"portability":[76],"across":[77],"diverse":[78],"computer":[79],"architectures,":[80],"including":[81],"AMD":[83],"Barcelona,":[84],"Intel":[85],"Nehalem,":[86],"Sun":[87],"Victoria":[88],"Falls,":[89],"and":[90,132,137],"latest":[92],"NVIDIA":[93],"GPUs.":[94],"Results":[95],"show":[96],"our":[98],"generalized":[99],"methodology":[100],"delivers":[101],"significant":[102],"gains":[104],"up":[106],"22\u00d7":[108],"speedup":[109],"over":[110],"reference":[112],"serial":[113],"implementation.":[114],"Overall":[115],"we":[116],"demonstrate":[117],"such":[119],"domain-specific":[120],"auto-tuners":[121],"hold":[122],"enormous":[123],"promise":[124],"for":[125],"efficiency,":[127],"productivity,":[129],"portability,":[131],"algorithmic":[133],"adaptability":[134],"on":[135],"existing":[136],"emerging":[138],"multicore":[139],"systems.":[140]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":11},{"year":2018,"cited_by_count":14},{"year":2017,"cited_by_count":17},{"year":2016,"cited_by_count":24},{"year":2015,"cited_by_count":18},{"year":2014,"cited_by_count":28},{"year":2013,"cited_by_count":24},{"year":2012,"cited_by_count":22}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
