{"id":"https://openalex.org/W4415250746","doi":"https://doi.org/10.1109/hpec67600.2025.11196316","title":"Performance Evaluation of LAPACK Using SVE Optimized BLAS Kernels","display_name":"Performance Evaluation of LAPACK Using SVE Optimized BLAS Kernels","publication_year":2025,"publication_date":"2025-09-15","ids":{"openalex":"https://openalex.org/W4415250746","doi":"https://doi.org/10.1109/hpec67600.2025.11196316"},"language":"en","primary_location":{"id":"doi:10.1109/hpec67600.2025.11196316","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196316","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5116901275","display_name":"Aniket P. Garade","orcid":null},"institutions":[{"id":"https://openalex.org/I1331500379","display_name":"Centre for Development of Advanced Computing","ror":"https://ror.org/022abst40","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1331500379","https://openalex.org/I4210121746"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Aniket P. Garade","raw_affiliation_strings":["Centre for Development of Advanced Computing,Bengaluru,India"],"affiliations":[{"raw_affiliation_string":"Centre for Development of Advanced Computing,Bengaluru,India","institution_ids":["https://openalex.org/I1331500379"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102899490","display_name":"Devendra Singh","orcid":"https://orcid.org/0000-0002-6332-2010"},"institutions":[{"id":"https://openalex.org/I1331500379","display_name":"Centre for Development of Advanced Computing","ror":"https://ror.org/022abst40","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1331500379","https://openalex.org/I4210121746"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sushil Pratap Singh","raw_affiliation_strings":["Centre for Development of Advanced Computing,Bengaluru,India"],"affiliations":[{"raw_affiliation_string":"Centre for Development of Advanced Computing,Bengaluru,India","institution_ids":["https://openalex.org/I1331500379"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068265109","display_name":"V. V. S. Prasanna Kumari Rayala","orcid":"https://orcid.org/0000-0002-7320-9329"},"institutions":[{"id":"https://openalex.org/I1331500379","display_name":"Centre for Development of Advanced Computing","ror":"https://ror.org/022abst40","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1331500379","https://openalex.org/I4210121746"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Vishal Rayala","raw_affiliation_strings":["Centre for Development of Advanced Computing,Bengaluru,India"],"affiliations":[{"raw_affiliation_string":"Centre for Development of Advanced Computing,Bengaluru,India","institution_ids":["https://openalex.org/I1331500379"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083731232","display_name":"H V Deepika","orcid":null},"institutions":[{"id":"https://openalex.org/I1331500379","display_name":"Centre for Development of Advanced Computing","ror":"https://ror.org/022abst40","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1331500379","https://openalex.org/I4210121746"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Deepika H V","raw_affiliation_strings":["Centre for Development of Advanced Computing,Bengaluru,India"],"affiliations":[{"raw_affiliation_string":"Centre for Development of Advanced Computing,Bengaluru,India","institution_ids":["https://openalex.org/I1331500379"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038622495","display_name":"P Haribabu","orcid":null},"institutions":[{"id":"https://openalex.org/I1331500379","display_name":"Centre for Development of Advanced Computing","ror":"https://ror.org/022abst40","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1331500379","https://openalex.org/I4210121746"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Haribabu P","raw_affiliation_strings":["Centre for Development of Advanced Computing,Bengaluru,India"],"affiliations":[{"raw_affiliation_string":"Centre for Development of Advanced Computing,Bengaluru,India","institution_ids":["https://openalex.org/I1331500379"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101957775","display_name":"Sathish Kumar","orcid":"https://orcid.org/0000-0002-3162-2211"},"institutions":[{"id":"https://openalex.org/I1331500379","display_name":"Centre for Development of Advanced Computing","ror":"https://ror.org/022abst40","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1331500379","https://openalex.org/I4210121746"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"S A Kumar","raw_affiliation_strings":["Centre for Development of Advanced Computing,Bengaluru,India"],"affiliations":[{"raw_affiliation_string":"Centre for Development of Advanced Computing,Bengaluru,India","institution_ids":["https://openalex.org/I1331500379"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102868385","display_name":"S D Sudarsan","orcid":"https://orcid.org/0000-0002-7990-6686"},"institutions":[{"id":"https://openalex.org/I1331500379","display_name":"Centre for Development of Advanced Computing","ror":"https://ror.org/022abst40","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1331500379","https://openalex.org/I4210121746"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"S D Sudarsan","raw_affiliation_strings":["Centre for Development of Advanced Computing,Bengaluru,India"],"affiliations":[{"raw_affiliation_string":"Centre for Development of Advanced Computing,Bengaluru,India","institution_ids":["https://openalex.org/I1331500379"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5116901275"],"corresponding_institution_ids":["https://openalex.org/I1331500379"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.30643275,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9750000238418579,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9750000238418579,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6585999727249146},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6420999765396118},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.5957000255584717},{"id":"https://openalex.org/keywords/linear-algebra","display_name":"Linear algebra","score":0.5683000087738037},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.4756999909877777},{"id":"https://openalex.org/keywords/ranging","display_name":"Ranging","score":0.367900013923645},{"id":"https://openalex.org/keywords/flops","display_name":"FLOPS","score":0.3531000018119812},{"id":"https://openalex.org/keywords/double-precision-floating-point-format","display_name":"Double-precision floating-point format","score":0.33869999647140503},{"id":"https://openalex.org/keywords/linear-system","display_name":"Linear system","score":0.3228999972343445}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8385999798774719},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7444999814033508},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6585999727249146},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6420999765396118},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.5957000255584717},{"id":"https://openalex.org/C139352143","wikidata":"https://www.wikidata.org/wiki/Q82571","display_name":"Linear algebra","level":2,"score":0.5683000087738037},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.5582000017166138},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.4756999909877777},{"id":"https://openalex.org/C115051666","wikidata":"https://www.wikidata.org/wiki/Q6522493","display_name":"Ranging","level":2,"score":0.367900013923645},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.3531000018119812},{"id":"https://openalex.org/C35912277","wikidata":"https://www.wikidata.org/wiki/Q1243369","display_name":"Double-precision floating-point format","level":3,"score":0.33869999647140503},{"id":"https://openalex.org/C6802819","wikidata":"https://www.wikidata.org/wiki/Q1072174","display_name":"Linear system","level":2,"score":0.3228999972343445},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3199000060558319},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.3165000081062317},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.31299999356269836},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.2921000123023987},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.2872999906539917},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.2865999937057495},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.274399995803833},{"id":"https://openalex.org/C161824985","wikidata":"https://www.wikidata.org/wiki/Q919509","display_name":"Vector processor","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C133095886","wikidata":"https://www.wikidata.org/wiki/Q1307173","display_name":"Single-precision floating-point format","level":3,"score":0.2712000012397766},{"id":"https://openalex.org/C2779982483","wikidata":"https://www.wikidata.org/wiki/Q6094420","display_name":"Iterative refinement","level":2,"score":0.27059999108314514},{"id":"https://openalex.org/C96972482","wikidata":"https://www.wikidata.org/wiki/Q1049168","display_name":"Xeon Phi","level":2,"score":0.2685000002384186},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.2669000029563904},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.26600000262260437},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.2624000012874603},{"id":"https://openalex.org/C2982832238","wikidata":"https://www.wikidata.org/wiki/Q5531640","display_name":"General purpose","level":2,"score":0.2603999972343445},{"id":"https://openalex.org/C2778400913","wikidata":"https://www.wikidata.org/wiki/Q1639024","display_name":"Mathematical software","level":3,"score":0.2596000134944916},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.2581000030040741},{"id":"https://openalex.org/C3020431745","wikidata":"https://www.wikidata.org/wiki/Q25325220","display_name":"Many core","level":2,"score":0.25589999556541443},{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.2526000142097473},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec67600.2025.11196316","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196316","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W2584202941","https://openalex.org/W2613264175","https://openalex.org/W2900234561","https://openalex.org/W2997728548","https://openalex.org/W3005380678","https://openalex.org/W3091837582","https://openalex.org/W4360995240","https://openalex.org/W4387010580","https://openalex.org/W4391972606","https://openalex.org/W4409132226"],"related_works":[],"abstract_inverted_index":{"Optimized":[0],"mathematical":[1],"libraries":[2],"tailored":[3],"to":[4,79,86],"hardware":[5],"architectures":[6],"are":[7],"critical":[8],"for":[9,64,117],"maximizing":[10],"performance":[11,44,74],"in":[12,88,104],"scientific":[13],"and":[14,30,50,84,93,109],"AI-driven":[15],"applications.":[16],"The":[17],"ARM":[18,38,124],"Scalable":[19],"Vector":[20],"Extension":[21],"(SVE)":[22],"introduces":[23],"architectural":[24],"features":[25],"such":[26],"as":[27,60],"vector-length-agnostic":[28],"execution":[29],"predicate-driven":[31],"control,":[32],"enabling":[33],"efficient":[34],"vectorization":[35],"on":[36,70,122],"modern":[37],"platforms.":[39,125],"This":[40],"paper":[41],"evaluates":[42],"the":[43,56,61,100,111],"impact":[45],"of":[46,102,113],"SVE-optimized":[47],"Level":[48,51],"1":[49],"2":[52],"BLAS":[53],"routines":[54],"within":[55],"OpenBLAS":[57],"library,":[58],"used":[59],"computational":[62],"backend":[63],"core":[65],"LAPACK":[66,82],"routines.":[67],"Experiments":[68],"conducted":[69],"A64FX-based":[71],"systems":[72],"demonstrate":[73],"gains":[75],"ranging":[76],"from":[77],"1.8\u00d7":[78],"3.6\u00d7":[80],"across":[81],"functions,":[83],"up":[85],"2.0\u00d7":[87],"real-world":[89],"applications":[90],"including":[91],"LAMMPS":[92],"scikit-learn\u2019s":[94],"Truncated":[95],"SVD.":[96],"These":[97],"results":[98],"validate":[99],"effectiveness":[101],"SVE":[103],"accelerating":[105],"linear":[106],"algebra":[107],"operations":[108],"underscore":[110],"importance":[112],"architecture-aware":[114],"kernel":[115],"optimizations":[116],"scalable,":[118],"portable":[119],"high-performance":[120],"computing":[121],"emerging":[123]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-16T00:00:00"}
