{"id":"https://openalex.org/W1865141138","doi":"https://doi.org/10.1007/978-3-540-69384-0_98","title":"Fast and Small Short Vector SIMD Matrix Multiplication Kernels for the Synergistic Processing Element of the CELL Processor","display_name":"Fast and Small Short Vector SIMD Matrix Multiplication Kernels for the Synergistic Processing Element of the CELL Processor","publication_year":2008,"publication_date":"2008-01-01","ids":{"openalex":"https://openalex.org/W1865141138","doi":"https://doi.org/10.1007/978-3-540-69384-0_98","mag":"1865141138"},"language":"en","primary_location":{"id":"doi:10.1007/978-3-540-69384-0_98","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-540-69384-0_98","pdf_url":"https://link.springer.com/content/pdf/10.1007%2F978-3-540-69384-0_98.pdf","source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007%2F978-3-540-69384-0_98.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057537240","display_name":"Wesley Alvaro","orcid":null},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Wesley Alvaro","raw_affiliation_strings":["University of Tennessee, Knoxville, USA"],"affiliations":[{"raw_affiliation_string":"University of Tennessee, Knoxville, USA","institution_ids":["https://openalex.org/I75027704"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070456582","display_name":"Jakub Kurzak","orcid":"https://orcid.org/0000-0002-9697-0145"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jakub Kurzak","raw_affiliation_strings":["University of Tennessee, Knoxville, USA"],"affiliations":[{"raw_affiliation_string":"University of Tennessee, Knoxville, USA","institution_ids":["https://openalex.org/I75027704"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075517045","display_name":"Jack Dongarra","orcid":"https://orcid.org/0000-0003-3247-1782"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]},{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]},{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Jack Dongarra","raw_affiliation_strings":["Oak Ridge National Laboratory, \u00a0, Oak Ridge, USA","University of Manchester, Manchester, UK","University of Tennessee, Knoxville, USA"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, \u00a0, Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]},{"raw_affiliation_string":"University of Manchester, Manchester, UK","institution_ids":["https://openalex.org/I28407311"]},{"raw_affiliation_string":"University of Tennessee, Knoxville, USA","institution_ids":["https://openalex.org/I75027704"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5057537240"],"corresponding_institution_ids":["https://openalex.org/I75027704"],"apc_list":{"value":5000,"currency":"EUR","value_usd":5392},"apc_paid":{"value":5000,"currency":"EUR","value_usd":5392},"fwci":3.1645,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.9159292,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"935","last_page":"944"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.8403643369674683},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.7971910238265991},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6918697357177734},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.641076385974884},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.6095216274261475},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.6039505004882812},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5971248149871826},{"id":"https://openalex.org/keywords/linear-algebra","display_name":"Linear algebra","score":0.5853305459022522},{"id":"https://openalex.org/keywords/eigenvalues-and-eigenvectors","display_name":"Eigenvalues and eigenvectors","score":0.5615161061286926},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.5177887678146362},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5023658275604248},{"id":"https://openalex.org/keywords/single-precision-floating-point-format","display_name":"Single-precision floating-point format","score":0.44387349486351013},{"id":"https://openalex.org/keywords/numerical-linear-algebra","display_name":"Numerical linear algebra","score":0.43992194533348083},{"id":"https://openalex.org/keywords/double-precision-floating-point-format","display_name":"Double-precision floating-point format","score":0.43416351079940796},{"id":"https://openalex.org/keywords/square-matrix","display_name":"Square matrix","score":0.41112035512924194},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.3797440230846405},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3218024969100952},{"id":"https://openalex.org/keywords/linear-system","display_name":"Linear system","score":0.29202258586883545},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18721115589141846},{"id":"https://openalex.org/keywords/symmetric-matrix","display_name":"Symmetric matrix","score":0.16032981872558594},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.12129586935043335}],"concepts":[{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.8403643369674683},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.7971910238265991},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6918697357177734},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.641076385974884},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.6095216274261475},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.6039505004882812},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5971248149871826},{"id":"https://openalex.org/C139352143","wikidata":"https://www.wikidata.org/wiki/Q82571","display_name":"Linear algebra","level":2,"score":0.5853305459022522},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.5615161061286926},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.5177887678146362},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5023658275604248},{"id":"https://openalex.org/C133095886","wikidata":"https://www.wikidata.org/wiki/Q1307173","display_name":"Single-precision floating-point format","level":3,"score":0.44387349486351013},{"id":"https://openalex.org/C163834973","wikidata":"https://www.wikidata.org/wiki/Q2004891","display_name":"Numerical linear algebra","level":3,"score":0.43992194533348083},{"id":"https://openalex.org/C35912277","wikidata":"https://www.wikidata.org/wiki/Q1243369","display_name":"Double-precision floating-point format","level":3,"score":0.43416351079940796},{"id":"https://openalex.org/C69044650","wikidata":"https://www.wikidata.org/wiki/Q2739329","display_name":"Square matrix","level":4,"score":0.41112035512924194},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3797440230846405},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3218024969100952},{"id":"https://openalex.org/C6802819","wikidata":"https://www.wikidata.org/wiki/Q1072174","display_name":"Linear system","level":2,"score":0.29202258586883545},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18721115589141846},{"id":"https://openalex.org/C54848796","wikidata":"https://www.wikidata.org/wiki/Q339011","display_name":"Symmetric matrix","level":3,"score":0.16032981872558594},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.12129586935043335},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1007/978-3-540-69384-0_98","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-540-69384-0_98","pdf_url":"https://link.springer.com/content/pdf/10.1007%2F978-3-540-69384-0_98.pdf","source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},{"id":"pmh:oai:pure.atira.dk:openaire_cris_publications/890cc171-ed3f-4fd2-9f54-1c4ecfc84309","is_oa":false,"landing_page_url":"https://research.manchester.ac.uk/en/publications/890cc171-ed3f-4fd2-9f54-1c4ecfc84309","pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Alvaro, W, Kurzak, J & Dongarra, J 2008, Fast and small short vector SIMD matrix multiplication kernels for the synergistic processing element of the CELL processor. in Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)|Lect. Notes Comput. Sci.. vol. 5101, Lecture Notes in Computer Science, Springer Nature, pp. 935-944, 8th International Conference on Computational Science, ICCS 2008, Krakow, 1/07/08. https://doi.org/10.1007/978-3-540-69384-0_98","raw_type":"info:eu-repo/semantics/conferenceObject"},{"id":"pmh:oai:pure.atira.dk:publications/890cc171-ed3f-4fd2-9f54-1c4ecfc84309","is_oa":false,"landing_page_url":"https://www.research.manchester.ac.uk/portal/en/publications/fast-and-small-short-vector-simd-matrix-multiplication-kernels-for-the-synergistic-processing-element-of-the-cell-processor(890cc171-ed3f-4fd2-9f54-1c4ecfc84309).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Alvaro, W, Kurzak, J & Dongarra, J 2008, Fast and small short vector SIMD matrix multiplication kernels for the synergistic processing element of the CELL processor. in Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)|Lect. Notes Comput. Sci.. vol. 5101, Lecture Notes in Computer Science, Springer Nature, pp. 935-944, 8th International Conference on Computational Science, ICCS 2008, Krakow, 1/07/08. https://doi.org/10.1007/978-3-540-69384-0_98","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"doi:10.1007/978-3-540-69384-0_98","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-540-69384-0_98","pdf_url":"https://link.springer.com/content/pdf/10.1007%2F978-3-540-69384-0_98.pdf","source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W1865141138.pdf","grobid_xml":"https://content.openalex.org/works/W1865141138.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W94439627","https://openalex.org/W1480928214","https://openalex.org/W1492601037","https://openalex.org/W1552224009","https://openalex.org/W1760551737","https://openalex.org/W1990262300","https://openalex.org/W2016358047","https://openalex.org/W2023126431","https://openalex.org/W2051598123","https://openalex.org/W2079658918","https://openalex.org/W2088943630","https://openalex.org/W2103817093","https://openalex.org/W2109222446","https://openalex.org/W2114067856","https://openalex.org/W2116431830","https://openalex.org/W2121082877","https://openalex.org/W2121380786","https://openalex.org/W2124661136","https://openalex.org/W2125263803","https://openalex.org/W2153092996","https://openalex.org/W2162991651","https://openalex.org/W2168689950","https://openalex.org/W2169631286","https://openalex.org/W4301491118"],"related_works":["https://openalex.org/W3150370983","https://openalex.org/W2239119680","https://openalex.org/W3022016791","https://openalex.org/W1638830944","https://openalex.org/W2963207152","https://openalex.org/W1564887326","https://openalex.org/W4226248541","https://openalex.org/W2030263612","https://openalex.org/W2752588380","https://openalex.org/W2375123777"],"abstract_inverted_index":{"Matrix":[0],"multiplication":[1,87,97,124],"is":[2,89,94,163],"one":[3],"of":[4,14,23,29,32,49,57,71,79,84,109,114,149,160,168,177],"the":[5,12,21,47,69,72,85,95,101,110,115,129,139,155,158,169],"most":[6],"common":[7],"numerical":[8,80],"operations,":[9],"especially":[10],"in":[11,55],"area":[13],"dense":[15],"linear":[16,30],"algebra,":[17],"where":[18],"it":[19],"forms":[20],"core":[22],"many":[24],"important":[25],"algorithms,":[26,81],"including":[27],"solvers":[28],"systems":[31],"equations,":[33],"least":[34],"square":[35],"problems,":[36],"and":[37,39,138,181],"singular":[38],"eigenvalue":[40],"computations.":[41],"The":[42,91],"STI":[43],"CELL":[44,73,116],"processor":[45,52,74],"exceeds":[46],"capabilities":[48],"any":[50],"other":[51],"available":[53],"today":[54],"terms":[56],"peak":[58],"single":[59,121],"precision,":[60],"floating":[61],"point":[62],"performance.":[63],"In":[64,118],"order":[65],"to":[66],"fully":[67],"exploit":[68],"potential":[70],"for":[75,100,147,179],"a":[76],"wide":[77],"range":[78],"fast":[82],"implementation":[83],"matrix":[86,96,123],"operation":[88,137,146],"essential.":[90],"crutial":[92],"component":[93],"kernel":[98],"crafted":[99],"short":[102],"vector":[103],"Single":[104],"Instruction":[105],"Multiple":[106],"Data":[107],"architecture":[108],"Synergistic":[111],"Processing":[112],"Element":[113],"processor.":[117],"this":[119],"paper,":[120],"precision":[122],"kernels":[125],"are":[126],"presented":[127],"implementing":[128],"C":[130,132,140,142],"=":[131,141],"\u2212":[133,143],"A":[134,144],"\u00d7B":[135,145],"T":[136],"matrices":[148],"size":[150],"64":[151],"\u00d764":[152],"elements.":[153],"For":[154],"latter":[156],"case,":[157],"performance":[159],"25.55":[161],"Gflop/s":[162],"reported,":[164],"or":[165],"99.80":[166],"percent":[167],"peak,":[170],"using":[171],"as":[172,174],"little":[173],"5.9":[175],"KB":[176],"storage":[178],"code":[180],"auxiliary":[182],"data":[183],"structures.":[184]},"counts_by_year":[{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
