{"id":"https://openalex.org/W4308090450","doi":"https://doi.org/10.1109/hpec55821.2022.9926354","title":"Computing In-Place FFTs with SIMD Lane Slicing","display_name":"Computing In-Place FFTs with SIMD Lane Slicing","publication_year":2022,"publication_date":"2022-09-19","ids":{"openalex":"https://openalex.org/W4308090450","doi":"https://doi.org/10.1109/hpec55821.2022.9926354"},"language":"en","primary_location":{"id":"doi:10.1109/hpec55821.2022.9926354","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec55821.2022.9926354","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063862876","display_name":"Beno\u00eet Dupont de Dinechin","orcid":"https://orcid.org/0000-0002-3164-2765"},"institutions":[{"id":"https://openalex.org/I4210161189","display_name":"ProbaYes (France)","ror":"https://ror.org/05nb5x242","country_code":"FR","type":"company","lineage":["https://openalex.org/I4210161189"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Benoit Dupont De Dinechin","raw_affiliation_strings":["Kalray SA,Montbonnot Saint-Martin,France","Kalray SA, Montbonnot Saint-Martin, France"],"affiliations":[{"raw_affiliation_string":"Kalray SA,Montbonnot Saint-Martin,France","institution_ids":[]},{"raw_affiliation_string":"Kalray SA, Montbonnot Saint-Martin, France","institution_ids":["https://openalex.org/I4210161189"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5063862876"],"corresponding_institution_ids":["https://openalex.org/I4210161189"],"apc_list":null,"apc_paid":null,"fwci":0.1487,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.41083769,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11034","display_name":"Digital Filter Design and Implementation","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11034","display_name":"Digital Filter Design and Implementation","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.984499990940094,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.9815000295639038,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.8924667835235596},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8209395408630371},{"id":"https://openalex.org/keywords/fast-fourier-transform","display_name":"Fast Fourier transform","score":0.7905082106590271},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7806463241577148},{"id":"https://openalex.org/keywords/very-long-instruction-word","display_name":"Very long instruction word","score":0.7159230709075928},{"id":"https://openalex.org/keywords/digital-signal-processing","display_name":"Digital signal processing","score":0.42855778336524963},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.24330604076385498},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.2427416741847992}],"concepts":[{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.8924667835235596},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8209395408630371},{"id":"https://openalex.org/C75172450","wikidata":"https://www.wikidata.org/wiki/Q623950","display_name":"Fast Fourier transform","level":2,"score":0.7905082106590271},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7806463241577148},{"id":"https://openalex.org/C170595534","wikidata":"https://www.wikidata.org/wiki/Q249743","display_name":"Very long instruction word","level":2,"score":0.7159230709075928},{"id":"https://openalex.org/C84462506","wikidata":"https://www.wikidata.org/wiki/Q173142","display_name":"Digital signal processing","level":2,"score":0.42855778336524963},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.24330604076385498},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2427416741847992}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec55821.2022.9926354","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec55821.2022.9926354","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.5199999809265137}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2115688358","https://openalex.org/W1503212777","https://openalex.org/W2072728786","https://openalex.org/W2291920536","https://openalex.org/W2162726111","https://openalex.org/W2022397046","https://openalex.org/W2146636354","https://openalex.org/W1536455114","https://openalex.org/W1603583590","https://openalex.org/W338671845"],"abstract_inverted_index":{"We":[0,85],"present":[1],"an":[2],"approach":[3,88],"for":[4,121],"implementing":[5],"in-place":[6,77],"FFTs":[7],"on":[8,41,89,132],"cores":[9],"fitted":[10],"with":[11,22,144],"SIMD":[12,23,31,42,59,68],"units":[13],"and":[14,48,61,117],"non-temporal":[15],"load-store":[16],"units.":[17],"Loading":[18],"the":[19,30,50,58,78,83,100,107,118,133],"input":[20],"samples":[21],"instructions":[24,64],"decimates":[25],"them":[26],"in":[27,124],"time":[28],"across":[29],"lanes.":[32],"A":[33,71],"classic":[34],"FFT":[35,73,95,126],"implementation":[36],"is":[37],"extended":[38],"to":[39,81],"operate":[40],"data":[43,47],"rather":[44],"than":[45],"scalar":[46],"computes":[49],"sub-transforms":[51,79],"concurrently.":[52],"This":[53],"enables":[54,114],"efficient":[55],"exploitation":[56],"of":[57,106],"arithmetic":[60],"memory":[62],"access":[63],"while":[65],"involving":[66],"little":[67],"lane":[69],"shuffling.":[70],"last":[72],"stage":[74],"then":[75],"recombines":[76],"results":[80],"produce":[82],"output.":[84],"illustrate":[86],"this":[87],"a":[90,139],"Cooley-":[91],"Tukey":[92],"radix-4":[93],"decimated-in-frequency":[94],"implementation,":[96],"which":[97,137],"also":[98],"integrates":[99],"two":[101],"inner":[102],"loop":[103],"collapsing":[104],"optimization":[105],"TI":[108],"C6x":[109],"DSP":[110],"_fft32\u00d732":[111],"code":[112],"that":[113],"software":[115],"pipelining":[116],"Burrus":[119],"technique":[120],"using":[122],"bit-reversal":[123],"high-radix":[125],"implementations.":[127],"Performance":[128],"evaluations":[129],"are":[130],"performed":[131],"Kalray":[134],"KV3":[135],"core,":[136],"implements":[138],"64-bit":[140],"vector-scalar":[141],"VLIW":[142],"architecture":[143],"level-l":[145],"cache":[146],"bypass":[147],"load":[148],"instructions.":[149]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
