{"id":"https://openalex.org/W2199084870","doi":"https://doi.org/10.1145/3155290","title":"Multidimensional Intratile Parallelization for Memory-Starved Stencil Computations","display_name":"Multidimensional Intratile Parallelization for Memory-Starved Stencil Computations","publication_year":2017,"publication_date":"2017-09-30","ids":{"openalex":"https://openalex.org/W2199084870","doi":"https://doi.org/10.1145/3155290","mag":"2199084870"},"language":"en","primary_location":{"id":"doi:10.1145/3155290","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3155290","pdf_url":null,"source":{"id":"https://openalex.org/S2483380313","display_name":"ACM Transactions on Parallel Computing","issn_l":"2329-4949","issn":["2329-4949","2329-4957"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Parallel Computing","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1510.04995","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Tareq M. Malas","orcid":null},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]},{"id":"https://openalex.org/I4210151627","display_name":"National Energy Research Scientific Computing Center","ror":"https://ror.org/05v3mvq14","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521","https://openalex.org/I4210151627"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Tareq M. Malas","raw_affiliation_strings":["National Energy Research Scientific Computing Center, Lawrence Berkeley National Laboratory"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Energy Research Scientific Computing Center, Lawrence Berkeley National Laboratory","institution_ids":["https://openalex.org/I4210151627","https://openalex.org/I148283060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082552227","display_name":"Georg Hager","orcid":"https://orcid.org/0000-0002-8723-2781"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Georg Hager","raw_affiliation_strings":["Erlangen Regional Computing Center (RRZE), Friedrich-Alexander University of Erlangen-Nuremberg, Erlangen, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Erlangen Regional Computing Center (RRZE), Friedrich-Alexander University of Erlangen-Nuremberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017526753","display_name":"Hatem Ltaief","orcid":"https://orcid.org/0000-0002-6897-1095"},"institutions":[{"id":"https://openalex.org/I71920554","display_name":"King Abdullah University of Science and Technology","ror":"https://ror.org/01q3tbs38","country_code":"SA","type":"education","lineage":["https://openalex.org/I71920554"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Hatem Ltaief","raw_affiliation_strings":["Extreme Computing Research Center (ECRC), King Abdullah University of Science and Technology, Thuwal, Saudi Arabia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Extreme Computing Research Center (ECRC), King Abdullah University of Science and Technology, Thuwal, Saudi Arabia","institution_ids":["https://openalex.org/I71920554"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021283893","display_name":"David E. Keyes","orcid":"https://orcid.org/0000-0002-4052-7224"},"institutions":[{"id":"https://openalex.org/I71920554","display_name":"King Abdullah University of Science and Technology","ror":"https://ror.org/01q3tbs38","country_code":"SA","type":"education","lineage":["https://openalex.org/I71920554"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"David E. Keyes","raw_affiliation_strings":["ECRC, King Abdullah University of Science and Technology, Thuwal, Saudi Arabia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ECRC, King Abdullah University of Science and Technology, Thuwal, Saudi Arabia","institution_ids":["https://openalex.org/I71920554"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I148283060","https://openalex.org/I4210151627"],"apc_list":null,"apc_paid":null,"fwci":0.9244,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.72983871,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"4","issue":"3","first_page":"1","last_page":"32"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stencil","display_name":"Stencil","score":0.9188019037246704},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8700866103172302},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8194319605827332},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.6692168712615967},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.4141589105129242},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.4135197103023529},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.24126183986663818}],"concepts":[{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.9188019037246704},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8700866103172302},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8194319605827332},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.6692168712615967},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.4141589105129242},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.4135197103023529},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.24126183986663818},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1145/3155290","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3155290","pdf_url":null,"source":{"id":"https://openalex.org/S2483380313","display_name":"ACM Transactions on Parallel Computing","issn_l":"2329-4949","issn":["2329-4949","2329-4957"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Parallel Computing","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1510.04995","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1510.04995","pdf_url":"https://arxiv.org/pdf/1510.04995","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2199084870","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1510.04995.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:repository.kaust.edu.sa:10754/631616","is_oa":false,"landing_page_url":"http://hdl.handle.net/10754/631616","pdf_url":null,"source":{"id":"https://openalex.org/S4306401596","display_name":"King Abdullah University of Science and Technology Repository (King Abdullah University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I71920554","host_organization_name":"King Abdullah University of Science and Technology","host_organization_lineage":["https://openalex.org/I71920554"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"},{"id":"doi:10.48550/arxiv.1510.04995","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1510.04995","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1510.04995","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1510.04995","pdf_url":"https://arxiv.org/pdf/1510.04995","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.9100000262260437,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322320","display_name":"King Abdullah University of Science and Technology","ror":"https://ror.org/01q3tbs38"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2199084870.pdf","grobid_xml":"https://content.openalex.org/works/W2199084870.grobid-xml"},"referenced_works_count":43,"referenced_works":["https://openalex.org/W20702786","https://openalex.org/W149681167","https://openalex.org/W1480947737","https://openalex.org/W1506424797","https://openalex.org/W1529356657","https://openalex.org/W1628605343","https://openalex.org/W1650934639","https://openalex.org/W1968013322","https://openalex.org/W1973271197","https://openalex.org/W1973532523","https://openalex.org/W1979457157","https://openalex.org/W1997147891","https://openalex.org/W2002555321","https://openalex.org/W2008436719","https://openalex.org/W2038454316","https://openalex.org/W2039378765","https://openalex.org/W2057577013","https://openalex.org/W2059882081","https://openalex.org/W2066139250","https://openalex.org/W2070961300","https://openalex.org/W2076780324","https://openalex.org/W2078636996","https://openalex.org/W2092602356","https://openalex.org/W2095875205","https://openalex.org/W2096642134","https://openalex.org/W2097757554","https://openalex.org/W2104512032","https://openalex.org/W2107725926","https://openalex.org/W2121082877","https://openalex.org/W2131218797","https://openalex.org/W2148038801","https://openalex.org/W2150319905","https://openalex.org/W2151764765","https://openalex.org/W2153768689","https://openalex.org/W2164890169","https://openalex.org/W2166622045","https://openalex.org/W2953368509","https://openalex.org/W2962978274","https://openalex.org/W2963799023","https://openalex.org/W2964101311","https://openalex.org/W3105621696","https://openalex.org/W3106055984","https://openalex.org/W3151489216"],"related_works":["https://openalex.org/W2962978274","https://openalex.org/W2080166316","https://openalex.org/W580168354","https://openalex.org/W2809015808","https://openalex.org/W2306912361","https://openalex.org/W2044045718","https://openalex.org/W2247651031","https://openalex.org/W2770391629","https://openalex.org/W2006471267","https://openalex.org/W2016414291","https://openalex.org/W2003108847","https://openalex.org/W2344019149","https://openalex.org/W637797136","https://openalex.org/W2604839371","https://openalex.org/W2731078336","https://openalex.org/W2778090897","https://openalex.org/W2062213698","https://openalex.org/W3209704753","https://openalex.org/W2159764943","https://openalex.org/W2002932832"],"abstract_inverted_index":{"Optimizing":[0],"the":[1,8,14,31,38,42,47,65,83,126,151,166,210,258],"performance":[2,156,188,211,219,242,268],"of":[3,10,50,69,182,223,260,267],"stencil":[4,20,108,168,176],"algorithms":[5,109],"has":[6],"been":[7],"subject":[9],"intense":[11],"research":[12],"over":[13],"last":[15],"two":[16,159],"decades.":[17],"Since":[18],"many":[19],"schemes":[21,177],"have":[22],"low":[23],"arithmetic":[24,192],"intensity,":[25],"most":[26],"optimizations":[27],"focus":[28],"on":[29,77,110,150,158,200],"increasing":[30],"temporal":[32,93],"data":[33,39,261],"access":[34],"locality,":[35],"thus":[36,246],"reducing":[37],"traffic":[40],"through":[41,233],"main":[43],"memory":[44],"interface":[45],"with":[46,113,165,203],"ultimate":[48],"goal":[49],"decoupling":[51],"from":[52,133],"this":[53],"bottleneck.":[54],"There":[55],"are,":[56],"however,":[57],"only":[58],"a":[59,101,114,122,179,240],"few":[60],"approaches":[61],"that":[62,227,252],"explicitly":[63],"leverage":[64],"shared":[66,115],"cache":[67,81,85,128],"feature":[68],"modern":[70],"multicore":[71,111],"chips.":[72],"If":[73],"every":[74],"thread":[75],"works":[76],"its":[78],"private,":[79],"separate":[80],"block,":[82],"available":[84],"space":[86,129],"can":[87,230],"become":[88],"too":[89],"small,":[90],"and":[91,163,171,178,190],"sufficient":[92],"locality":[94],"may":[95],"not":[96],"be":[97,254,263],"achieved.":[98],"We":[99,154,206],"propose":[100],"flexible":[102],"multidimensional":[103],"intratile":[104],"parallelization":[105],"method":[106,119,229],"for":[107,249],"CPUs":[112],"outer-level":[116],"cache.":[117],"This":[118],"leads":[120],"to":[121,145],"significant":[123],"reduction":[124],"in":[125,208,265],"required":[127],"without":[130],"adverse":[131],"effects":[132],"hardware":[134],"prefetching":[135],"or":[136,269],"TLB":[137],"shortage.":[138],"Our":[139,221],"Girih":[140,185],"framework":[141],"includes":[142],"an":[143],"autotuner":[144],"select":[146],"optimal":[147],"parameter":[148],"configurations":[149],"target":[152],"hardware.":[153],"conduct":[155],"experiments":[157],"contemporary":[160],"Intel":[161],"processors":[162],"compare":[164],"state-of-the-art":[167],"frameworks":[169],"Pluto":[170],"Pochoir,":[172],"using":[173,217],"four":[174],"corner-case":[175],"wide":[180],"range":[181],"problem":[183,197],"sizes.":[184],"shows":[186],"substantial":[187],"advantages":[189],"best":[191],"intensity":[193],"at":[194,213,239],"almost":[195],"all":[196],"sizes,":[198],"especially":[199],"low-intensity":[201],"stencils":[202],"variable":[204],"coefficients.":[205],"study":[207],"detail":[209],"behavior":[212],"varying":[214],"grid":[215],"sizes":[216],"phenomenological":[218],"modeling.":[220],"analysis":[222],"energy":[224,232,270],"consumption":[225],"reveals":[226],"our":[228],"save":[231],"reduced":[234],"DRAM":[235],"bandwidth":[236],"usage":[237],"even":[238],"marginal":[241],"gain.":[243],"It":[244],"is":[245],"well":[247],"suited":[248],"future":[250],"architectures":[251],"will":[253],"strongly":[255],"challenged":[256],"by":[257],"cost":[259],"movement,":[262],"it":[264],"terms":[266],"consumption.":[271]},"counts_by_year":[{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
