{"id":"https://openalex.org/W2176391467","doi":"https://doi.org/10.1145/2381056.2381073","title":"Optimizing matrix transposes using a POWER7 cache model and explicit prefetching","display_name":"Optimizing matrix transposes using a POWER7 cache model and explicit prefetching","publication_year":2012,"publication_date":"2012-10-08","ids":{"openalex":"https://openalex.org/W2176391467","doi":"https://doi.org/10.1145/2381056.2381073","mag":"2176391467"},"language":"en","primary_location":{"id":"doi:10.1145/2381056.2381073","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2381056.2381073","pdf_url":null,"source":{"id":"https://openalex.org/S4210187660","display_name":"ACM SIGMETRICS Performance Evaluation Review","issn_l":"0163-5999","issn":["0163-5999","1557-9484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGMETRICS Performance Evaluation Review","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077053823","display_name":"Gabriel Mateescu","orcid":null},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Gabriel Mateescu","raw_affiliation_strings":["Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne, Lausanne, Switzerland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne, Lausanne, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074942058","display_name":"Gregory H. Bauer","orcid":"https://orcid.org/0000-0001-7247-0915"},"institutions":[{"id":"https://openalex.org/I4210135837","display_name":"National Center for Supercomputing Applications","ror":"https://ror.org/03r10zj06","country_code":"US","type":"facility","lineage":["https://openalex.org/I157725225","https://openalex.org/I4210135837"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gregory H. Bauer","raw_affiliation_strings":["National Center for Supercomputing Applications, Urbana, IL, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Center for Supercomputing Applications, Urbana, IL, USA","institution_ids":["https://openalex.org/I4210135837"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111782390","display_name":"Robert Fiedler","orcid":null},"institutions":[{"id":"https://openalex.org/I4210135837","display_name":"National Center for Supercomputing Applications","ror":"https://ror.org/03r10zj06","country_code":"US","type":"facility","lineage":["https://openalex.org/I157725225","https://openalex.org/I4210135837"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Robert A. Fiedler","raw_affiliation_strings":["National Center for Supercomputing Applications, Urbana, IL, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Center for Supercomputing Applications, Urbana, IL, USA","institution_ids":["https://openalex.org/I4210135837"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5077053823"],"corresponding_institution_ids":["https://openalex.org/I5124864"],"apc_list":null,"apc_paid":null,"fwci":0.5858,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.70220655,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"40","issue":"2","first_page":"68","last_page":"73"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8780456781387329},{"id":"https://openalex.org/keywords/transpose","display_name":"Transpose","score":0.8286759257316589},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8145164847373962},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.7189747095108032},{"id":"https://openalex.org/keywords/cache-oblivious-algorithm","display_name":"Cache-oblivious algorithm","score":0.5883868336677551},{"id":"https://openalex.org/keywords/cache-pollution","display_name":"Cache pollution","score":0.5218697786331177},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.5153595805168152},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.5019207000732422},{"id":"https://openalex.org/keywords/subroutine","display_name":"Subroutine","score":0.4744889736175537},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4743082523345947},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.4585355222225189},{"id":"https://openalex.org/keywords/cache-algorithms","display_name":"Cache algorithms","score":0.4426252245903015},{"id":"https://openalex.org/keywords/concurrency","display_name":"Concurrency","score":0.43687236309051514},{"id":"https://openalex.org/keywords/cache-coloring","display_name":"Cache coloring","score":0.41093194484710693},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.22028952836990356},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.17245852947235107}],"concepts":[{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8780456781387329},{"id":"https://openalex.org/C200106649","wikidata":"https://www.wikidata.org/wiki/Q223683","display_name":"Transpose","level":3,"score":0.8286759257316589},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8145164847373962},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.7189747095108032},{"id":"https://openalex.org/C59687516","wikidata":"https://www.wikidata.org/wiki/Q5015938","display_name":"Cache-oblivious algorithm","level":5,"score":0.5883868336677551},{"id":"https://openalex.org/C113166858","wikidata":"https://www.wikidata.org/wiki/Q5015981","display_name":"Cache pollution","level":5,"score":0.5218697786331177},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.5153595805168152},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.5019207000732422},{"id":"https://openalex.org/C96147967","wikidata":"https://www.wikidata.org/wiki/Q190686","display_name":"Subroutine","level":2,"score":0.4744889736175537},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4743082523345947},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.4585355222225189},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.4426252245903015},{"id":"https://openalex.org/C193702766","wikidata":"https://www.wikidata.org/wiki/Q1414548","display_name":"Concurrency","level":2,"score":0.43687236309051514},{"id":"https://openalex.org/C201148951","wikidata":"https://www.wikidata.org/wiki/Q5015976","display_name":"Cache coloring","level":4,"score":0.41093194484710693},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.22028952836990356},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.17245852947235107},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2381056.2381073","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2381056.2381073","pdf_url":null,"source":{"id":"https://openalex.org/S4210187660","display_name":"ACM SIGMETRICS Performance Evaluation Review","issn_l":"0163-5999","issn":["0163-5999","1557-9484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGMETRICS Performance Evaluation Review","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W1492601037","https://openalex.org/W1912350778","https://openalex.org/W1980620372","https://openalex.org/W2017345827","https://openalex.org/W2043340768","https://openalex.org/W2323110450","https://openalex.org/W2725179571","https://openalex.org/W6605898172"],"related_works":["https://openalex.org/W2098406302","https://openalex.org/W2121191383","https://openalex.org/W2538519144","https://openalex.org/W2546991807","https://openalex.org/W1584415117","https://openalex.org/W1505654810","https://openalex.org/W2076114130","https://openalex.org/W4252570104","https://openalex.org/W2734782074","https://openalex.org/W2109265242"],"abstract_inverted_index":{"We":[0,13,28],"consider":[1],"the":[2,10,30,39,43,47,68,72,86,99,102],"problem":[3],"of":[4,46,54,67,71,85],"efficiently":[5],"computing":[6],"matrix":[7,16,49,56],"transposes":[8],"on":[9],"POWER7":[11,31],"architecture.":[12],"develop":[14],"a":[15,95],"transpose":[17,50,57],"algorithm":[18,58],"that":[19,66,84],"uses":[20],"cache":[21,23,33],"blocking,":[22],"prefetching":[24],"and":[25,34,37,74,78,101],"data":[26,32],"alignment.":[27],"model":[29,40],"memory":[35,44,104],"concurrency":[36],"use":[38],"to":[41,61],"predict":[42],"throughput":[45],"proposed":[48],"algorithm.":[51],"The":[52],"performance":[53],"our":[55],"is":[59,79],"up":[60],"five":[62],"times":[63,81],"higher":[64,82],"than":[65,83],"dgetmo":[69],"routine":[70],"Engineering":[73],"Scientific":[75],"Subroutine":[76],"Library":[77],"2.5":[80],"code":[87],"generated":[88],"by":[89],"compiler-inserted":[90],"prefetching.":[91],"Numerical":[92],"experiments":[93],"indicate":[94],"good":[96],"agreement":[97],"between":[98],"predicted":[100],"measured":[103],"throughput.":[105]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2013,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
