{"id":"https://openalex.org/W2078636996","doi":"https://doi.org/10.1145/2751205.2751240","title":"Quantifying Performance Bottlenecks of Stencil Computations Using the Execution-Cache-Memory Model","display_name":"Quantifying Performance Bottlenecks of Stencil Computations Using the Execution-Cache-Memory Model","publication_year":2015,"publication_date":"2015-06-02","ids":{"openalex":"https://openalex.org/W2078636996","doi":"https://doi.org/10.1145/2751205.2751240","mag":"2078636996"},"language":"en","primary_location":{"id":"doi:10.1145/2751205.2751240","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2751205.2751240","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM on International Conference on Supercomputing","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1410.5010","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Holger Stengel","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Holger Stengel","raw_affiliation_strings":["Friedrich-Alexander University of Erlangen-Nuremberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander University of Erlangen-Nuremberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jan Treibig","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jan Treibig","raw_affiliation_strings":["Friedrich-Alexander University of Erlangen-Nuremberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander University of Erlangen-Nuremberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Georg Hager","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Georg Hager","raw_affiliation_strings":["Friedrich-Alexander University of Erlangen-Nuremberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander University of Erlangen-Nuremberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"last","author":{"id":null,"display_name":"Gerhard Wellein","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Gerhard Wellein","raw_affiliation_strings":["Friedrich-Alexander University of Erlangen-Nuremberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander University of Erlangen-Nuremberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I181369854"],"apc_list":null,"apc_paid":null,"fwci":9.2329,"has_fulltext":false,"cited_by_count":76,"citation_normalized_percentile":{"value":0.98298791,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"207","last_page":"216"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stencil","display_name":"Stencil","score":0.9376000165939331},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7013000249862671},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5837000012397766},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.5616000294685364},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.3450999855995178},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.30880001187324524},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.29789999127388}],"concepts":[{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.9376000165939331},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7642999887466431},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7013000249862671},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5837000012397766},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.5616000294685364},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.3450999855995178},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3400999903678894},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33379998803138733},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.30880001187324524},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.29789999127388},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.296999990940094},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.29670000076293945},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.289900004863739},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.2831999957561493},{"id":"https://openalex.org/C2777115002","wikidata":"https://www.wikidata.org/wiki/Q7168246","display_name":"Performance prediction","level":2,"score":0.2809000015258789},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.2808000147342682},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.275299996137619},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.2685000002384186}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/2751205.2751240","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2751205.2751240","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM on International Conference on Supercomputing","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1410.5010","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1410.5010","pdf_url":"https://arxiv.org/pdf/1410.5010","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:repository.kaust.edu.sa:10754/679277","is_oa":false,"landing_page_url":"http://hdl.handle.net/10754/679277","pdf_url":null,"source":{"id":"https://openalex.org/S4306401596","display_name":"King Abdullah University of Science and Technology Repository (King Abdullah University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I71920554","host_organization_name":"King Abdullah University of Science and Technology","host_organization_lineage":["https://openalex.org/I71920554"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1410.5010","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1410.5010","pdf_url":"https://arxiv.org/pdf/1410.5010","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W1480947737","https://openalex.org/W1961751213","https://openalex.org/W1997147891","https://openalex.org/W2002555321","https://openalex.org/W2008005532","https://openalex.org/W2057577013","https://openalex.org/W2091545779","https://openalex.org/W2125623590","https://openalex.org/W2128544095"],"related_works":[],"abstract_inverted_index":{"Stencil":[0],"algorithms":[1,74],"on":[2,49,75],"regular":[3],"lattices":[4],"appear":[5],"in":[6],"many":[7],"fields":[8],"of":[9,33,72,110,129],"computational":[10],"science,":[11],"and":[12,40,64,90,125,139],"much":[13],"effort":[14],"has":[15],"been":[16],"put":[17],"into":[18],"optimized":[19],"implementations.":[20],"Such":[21],"activities":[22],"are":[23,114],"usually":[24],"not":[25],"guided":[26],"by":[27,42,101],"performance":[28,38,43,70,89],"models":[29],"that":[30],"provide":[31],"estimates":[32],"expected":[34,144],"speedup.":[35],"Understanding":[36],"the":[37,58,69,83,102,108,118,122,127,149,153],"properties":[39],"bottlenecks":[41,71],"modeling":[44],"enables":[45],"a":[46,76],"clear":[47],"view":[48],"promising":[50],"optimization":[51,131],"opportunities.":[52],"In":[53],"this":[54],"work":[55],"we":[56,105],"refine":[57],"recently":[59],"developed":[60],"Execution-Cache-Memory":[61],"(ECM)":[62],"model":[63,84,104,151],"use":[65],"it":[66],"to":[67,85,116,152],"quantify":[68,107],"stencil":[73,97],"contemporary":[77],"Intel":[78],"processor.":[79],"This":[80],"includes":[81],"applying":[82],"arrive":[86],"at":[87],"single-core":[88],"scalability":[91],"predictions":[92],"for":[93,142],"typical":[94,130],"\"corner":[95],"case\"":[96],"loop":[98],"kernels.":[99],"Guided":[100],"ECM":[103,150],"accurately":[106],"significance":[109],"\"layer":[111],"conditions,\"":[112],"which":[113],"required":[115],"estimate":[117],"data":[119],"traffic":[120],"through":[121],"memory":[123],"hierarchy,":[124],"study":[126],"impact":[128],"approaches":[132],"such":[133],"as":[134],"spatial":[135],"blocking,":[136],"strength":[137],"reduction,":[138],"temporal":[140],"blocking":[141],"their":[143],"benefits.":[145],"We":[146],"also":[147],"compare":[148],"widely":[154],"known":[155],"Roofline":[156],"model.":[157]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":12},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":8},{"year":2018,"cited_by_count":7},{"year":2017,"cited_by_count":9},{"year":2016,"cited_by_count":10},{"year":2015,"cited_by_count":2}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2016-06-24T00:00:00"}
