{"id":"https://openalex.org/W4308090480","doi":"https://doi.org/10.1109/hpec55821.2022.9926349","title":"Performance speedup of Quantum Espresso using optimized AOCL-FFTW","display_name":"Performance speedup of Quantum Espresso using optimized AOCL-FFTW","publication_year":2022,"publication_date":"2022-09-19","ids":{"openalex":"https://openalex.org/W4308090480","doi":"https://doi.org/10.1109/hpec55821.2022.9926349"},"language":"en","primary_location":{"id":"doi:10.1109/hpec55821.2022.9926349","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec55821.2022.9926349","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085783331","display_name":"S. Biplab Raut","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"S. Biplab Raut","raw_affiliation_strings":["AMD India Private Limited,Bangalore,India","AMD India Private Limited, Bangalore, India"],"affiliations":[{"raw_affiliation_string":"AMD India Private Limited,Bangalore,India","institution_ids":[]},{"raw_affiliation_string":"AMD India Private Limited, Bangalore, India","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5085783331"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0915,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.41114315,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10558","display_name":"Advancements in Semiconductor Devices and Circuit Design","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10558","display_name":"Advancements in Semiconductor Devices and Circuit Design","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9876999855041504,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7306554317474365},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7301983833312988},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.7061488628387451},{"id":"https://openalex.org/keywords/fast-fourier-transform","display_name":"Fast Fourier transform","score":0.5697911977767944},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.49650365114212036},{"id":"https://openalex.org/keywords/node","display_name":"Node (physics)","score":0.4784809947013855},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.42479854822158813},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4247295558452606},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.3725707530975342},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.21733364462852478},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1234380304813385},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.08824360370635986},{"id":"https://openalex.org/keywords/quantum-mechanics","display_name":"Quantum mechanics","score":0.06189793348312378}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7306554317474365},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7301983833312988},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.7061488628387451},{"id":"https://openalex.org/C75172450","wikidata":"https://www.wikidata.org/wiki/Q623950","display_name":"Fast Fourier transform","level":2,"score":0.5697911977767944},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.49650365114212036},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.4784809947013855},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.42479854822158813},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4247295558452606},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3725707530975342},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.21733364462852478},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1234380304813385},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.08824360370635986},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.06189793348312378}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec55821.2022.9926349","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec55821.2022.9926349","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2317245370","https://openalex.org/W4249323025","https://openalex.org/W198851386","https://openalex.org/W2030310580","https://openalex.org/W947442053","https://openalex.org/W1980160788","https://openalex.org/W2148915962","https://openalex.org/W2283866686","https://openalex.org/W4287182096","https://openalex.org/W27867058"],"abstract_inverted_index":{"Quantum":[0],"Espresso":[1],"(QE)":[2],"is":[3,62],"an":[4,23],"open-source":[5],"software":[6],"suite":[7],"for":[8,28,51,67],"electronic-structure":[9],"calculations":[10],"and":[11,35,45,97,143],"materials":[12],"modeling":[13],"at":[14],"the":[15,36,56,63,76,86,105,124,128,151],"nanoscale.":[16],"QE":[17,42,68,73,109,152],"depends":[18],"upon":[19],"multiple":[20],"libraries":[21],"including":[22],"internal":[24],"or":[25],"external":[26],"library":[27,65,78],"FFT":[29,64],"computations.":[30],"The":[31],"iterative":[32],"diagonalization":[33],"process":[34],"computation":[37],"of":[38,55,95,108,113,127],"charge":[39],"density":[40],"in":[41,79,101,110,120,131,135,141],"use":[43,112],"forward":[44],"inverse":[46],"3D":[47],"FFTs":[48],"that":[49,147],"account":[50],"a":[52,80,92],"large":[53],"portion":[54],"total":[57],"application":[58],"runtime.":[59],"AOCL-":[60],"FFTW":[61,77,114],"recommended":[66],"on":[69],"AMD":[70],"CPU":[71],"systems.":[72],"currently":[74],"uses":[75],"sub-optimal":[81],"manner":[82],"thereby":[83],"not":[84],"achieving":[85],"best":[87],"performance.":[88],"This":[89],"paper":[90],"presents":[91],"new":[93],"set":[94],"design":[96],"implementation":[98],"strategies":[99],"applied":[100],"AOCL-FFTW":[102,132],"to":[103,149],"overcome":[104],"major":[106],"limitations":[107],"its":[111],"without":[115],"requiring":[116],"any":[117],"code":[118],"changes":[119],"QE.":[121],"Results":[122],"showcasing":[123],"performance":[125],"benefits":[126],"proposed":[129],"optimizations":[130],"are":[133,139],"presented":[134],"this":[136],"paper.":[137],"Speed-ups":[138],"achieved":[140],"single-node":[142],"multi-node":[144],"test":[145],"executions":[146],"help":[148],"accelerate":[150],"application.":[153]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
