{"id":"https://openalex.org/W2025093669","doi":"https://doi.org/10.1145/2541228.2555292","title":"Tile size selection revisited","display_name":"Tile size selection revisited","publication_year":2013,"publication_date":"2013-12-01","ids":{"openalex":"https://openalex.org/W2025093669","doi":"https://doi.org/10.1145/2541228.2555292","mag":"2025093669"},"language":"en","primary_location":{"id":"doi:10.1145/2541228.2555292","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2541228.2555292","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2541228.2555292","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/2541228.2555292","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102742730","display_name":"Sanyam Mehta","orcid":"https://orcid.org/0009-0005-5319-689X"},"institutions":[{"id":"https://openalex.org/I2800403580","display_name":"University of Minnesota System","ror":"https://ror.org/03grvy078","country_code":"US","type":"education","lineage":["https://openalex.org/I2800403580"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sanyam Mehta","raw_affiliation_strings":["University of Minnesota"],"affiliations":[{"raw_affiliation_string":"University of Minnesota","institution_ids":["https://openalex.org/I2800403580"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032540349","display_name":"Gautham Beeraka","orcid":null},"institutions":[{"id":"https://openalex.org/I2800403580","display_name":"University of Minnesota System","ror":"https://ror.org/03grvy078","country_code":"US","type":"education","lineage":["https://openalex.org/I2800403580"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gautham Beeraka","raw_affiliation_strings":["University of Minnesota"],"affiliations":[{"raw_affiliation_string":"University of Minnesota","institution_ids":["https://openalex.org/I2800403580"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052005800","display_name":"Pen-Chung Yew","orcid":"https://orcid.org/0000-0001-9653-8777"},"institutions":[{"id":"https://openalex.org/I2800403580","display_name":"University of Minnesota System","ror":"https://ror.org/03grvy078","country_code":"US","type":"education","lineage":["https://openalex.org/I2800403580"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pen-Chung Yew","raw_affiliation_strings":["University of Minnesota"],"affiliations":[{"raw_affiliation_string":"University of Minnesota","institution_ids":["https://openalex.org/I2800403580"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102742730"],"corresponding_institution_ids":["https://openalex.org/I2800403580"],"apc_list":null,"apc_paid":null,"fwci":1.9029,"has_fulltext":true,"cited_by_count":32,"citation_normalized_percentile":{"value":0.85713055,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"10","issue":"4","first_page":"1","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8317196369171143},{"id":"https://openalex.org/keywords/tile","display_name":"Tile","score":0.7790002226829529},{"id":"https://openalex.org/keywords/loop-tiling","display_name":"Loop tiling","score":0.6773619055747986},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.6324697136878967},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6237536072731018},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5851907730102539},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5481264591217041},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.48101434111595154},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.4560624659061432},{"id":"https://openalex.org/keywords/stencil","display_name":"Stencil","score":0.437996506690979},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.4359554648399353},{"id":"https://openalex.org/keywords/optimizing-compiler","display_name":"Optimizing compiler","score":0.42857831716537476},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.12531134486198425},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.08648058772087097}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8317196369171143},{"id":"https://openalex.org/C2780728851","wikidata":"https://www.wikidata.org/wiki/Q468402","display_name":"Tile","level":2,"score":0.7790002226829529},{"id":"https://openalex.org/C11799548","wikidata":"https://www.wikidata.org/wiki/Q6675847","display_name":"Loop tiling","level":3,"score":0.6773619055747986},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.6324697136878967},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6237536072731018},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5851907730102539},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5481264591217041},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.48101434111595154},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.4560624659061432},{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.437996506690979},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.4359554648399353},{"id":"https://openalex.org/C190902152","wikidata":"https://www.wikidata.org/wiki/Q1325106","display_name":"Optimizing compiler","level":3,"score":0.42857831716537476},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.12531134486198425},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.08648058772087097},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2541228.2555292","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2541228.2555292","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2541228.2555292","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/2541228.2555292","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2541228.2555292","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2541228.2555292","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8662217532","display_name":"CRI: IAD  Exploiting Multicore Processor Technology for Interactive Supercomputing","funder_award_id":"0708822","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2025093669.pdf","grobid_xml":"https://content.openalex.org/works/W2025093669.grobid-xml"},"referenced_works_count":40,"referenced_works":["https://openalex.org/W38784877","https://openalex.org/W243738321","https://openalex.org/W1502340801","https://openalex.org/W1561247983","https://openalex.org/W1562841074","https://openalex.org/W1843640069","https://openalex.org/W1964031104","https://openalex.org/W1972209410","https://openalex.org/W1973923211","https://openalex.org/W1991338294","https://openalex.org/W2002257715","https://openalex.org/W2007451249","https://openalex.org/W2020505145","https://openalex.org/W2034761517","https://openalex.org/W2056304566","https://openalex.org/W2063249715","https://openalex.org/W2065705265","https://openalex.org/W2072277531","https://openalex.org/W2076848169","https://openalex.org/W2078845975","https://openalex.org/W2084379367","https://openalex.org/W2098220211","https://openalex.org/W2099059741","https://openalex.org/W2108737583","https://openalex.org/W2116493917","https://openalex.org/W2119609467","https://openalex.org/W2123680107","https://openalex.org/W2136628731","https://openalex.org/W2142252451","https://openalex.org/W2148038801","https://openalex.org/W2158158791","https://openalex.org/W2158626113","https://openalex.org/W2159309967","https://openalex.org/W2164197394","https://openalex.org/W2172191698","https://openalex.org/W2803835569","https://openalex.org/W3203568064","https://openalex.org/W4240791849","https://openalex.org/W4243796884","https://openalex.org/W4248073216"],"related_works":["https://openalex.org/W2014071052","https://openalex.org/W2244841219","https://openalex.org/W2173545083","https://openalex.org/W2809483446","https://openalex.org/W2055182305","https://openalex.org/W4256622436","https://openalex.org/W1549990549","https://openalex.org/W4247745956","https://openalex.org/W4249968602","https://openalex.org/W2119042753"],"abstract_inverted_index":{"Loop":[0],"tiling":[1,149],"is":[2,39,260],"a":[3,70,97,108,177,192,221],"widely":[4],"used":[5],"loop":[6],"transformation":[7],"to":[8,27,41,81,125,262],"enhance":[9],"data":[10,14,135,219],"locality":[11],"and":[12,59,78,195,226,234],"allow":[13],"reuse.":[15],"In":[16,46,103,142,248],"the":[17,47,74,86,89,118,146,151,159,210,238,244,250,263,273,277],"tiled":[18,44,101],"code,":[19],"however,":[20],"tiles":[21,242],"of":[22,34,43,76,100,140,148,224,232],"different":[23,204],"sizes":[24],"can":[25],"lead":[26],"significant":[28],"variation":[29],"in":[30,96,122,137,154,157,170,175,191,220],"performance.":[31],"Thus,":[32],"selection":[33,51,115,132,189,258],"an":[35,93,228,269],"optimal":[36,160],"tile":[37,49,113,130,161,181,187,251,256],"size":[38,50,114,131,182,188,252,257,266],"critical":[40,174],"performance":[42,230],"codes.":[45,102],"past,":[48],"has":[52],"been":[53],"attempted":[54],"using":[55,66,202],"both":[56],"static":[57,67],"analytical":[58,110,212,274],"dynamic":[60],"empirical":[61],"(auto-tuning)":[62],"models.":[63],"Past":[64],"work":[65],"models":[68,91,213],"assumed":[69],"direct-mapped":[71],"cache":[72,225],"for":[73,112,180,243],"purpose":[75],"analysis":[77],"thus":[79],"proved":[80],"be":[82],"less":[83],"robust.":[84],"On":[85],"other":[87],"hand,":[88],"auto-tuning":[90],"involve":[92],"exhaustive":[94],"search":[95],"large":[98],"space":[99],"this":[104],"article,":[105],"we":[106],"propose":[107],"new":[109],"model":[111,133,179,190,208,275],"that":[116,165,214],"leverages":[117],"high":[119],"set":[120],"associativity":[121],"modern":[123,155],"caches":[124],"minimize":[126],"conflict":[127],"misses.":[128],"Our":[129,207],"targets":[134],"reuse":[136],"multiple":[138],"levels":[139],"cache.":[141],"addition,":[143,249],"it":[144,197],"considers":[145],"interaction":[147],"with":[150],"SIMD":[152],"unit":[153],"processors":[156],"estimating":[158],"size.":[162],"We":[163,184],"find":[164],"these":[166],"factors,":[167],"not":[168],"considered":[169],"previous":[171,211],"models,":[172],"are":[173,215],"developing":[176],"robust":[178],"selection.":[183],"implement":[185],"our":[186,255],"polyhedral":[193],"compiler":[194],"test":[196],"on":[198,217],"12":[199],"benchmark":[200],"kernels":[201],"two":[203,245],"problem":[205,246],"sizes.":[206,247],"outperforms":[209],"based":[216],"reusing":[218],"single":[222],"level":[223],"achieves":[227],"average":[229],"improvement":[231],"9.7%":[233],"20.4%,":[235],"respectively,":[236],"over":[237],"best":[239,264],"square":[240],"(cubic)":[241],"chosen":[253],"by":[254],"algorithm":[259],"similar":[261],"performing":[265],"obtained":[267],"through":[268],"extensive":[270],"search,":[271],"validating":[272],"underlying":[276],"algorithm.":[278]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":8},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":2}],"updated_date":"2026-04-14T08:04:32.555800","created_date":"2025-10-10T00:00:00"}
