{"id":"https://openalex.org/W2018641575","doi":"https://doi.org/10.1145/1995896.1995938","title":"Automatic SIMD vectorization of fast fourier transforms for the larrabee and AVX instruction sets","display_name":"Automatic SIMD vectorization of fast fourier transforms for the larrabee and AVX instruction sets","publication_year":2011,"publication_date":"2011-05-31","ids":{"openalex":"https://openalex.org/W2018641575","doi":"https://doi.org/10.1145/1995896.1995938","mag":"2018641575"},"language":"en","primary_location":{"id":"doi:10.1145/1995896.1995938","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1995896.1995938","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the international conference on Supercomputing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017329379","display_name":"Daniel S. McFarlin","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel S. McFarlin","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA","Carnegie-Mellon University, Pittsburgh, Pa., USA#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carnegie-Mellon University, Pittsburgh, Pa., USA#TAB#","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022230226","display_name":"Volodymyr Arbatov","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Volodymyr Arbatov","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA","Carnegie-Mellon University, Pittsburgh, Pa., USA#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carnegie-Mellon University, Pittsburgh, Pa., USA#TAB#","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062806943","display_name":"Franz Franchetti","orcid":"https://orcid.org/0000-0002-3529-8973"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Franz Franchetti","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA","Carnegie-Mellon University, Pittsburgh, Pa., USA#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carnegie-Mellon University, Pittsburgh, Pa., USA#TAB#","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076407181","display_name":"Markus P\u00fcschel","orcid":"https://orcid.org/0000-0001-8834-8551"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Markus P\u00fcschel","raw_affiliation_strings":["ETH Zurich, Zurich, Switzerland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.3187,"has_fulltext":false,"cited_by_count":41,"citation_normalized_percentile":{"value":0.88751841,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"265","last_page":"274"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8453413248062134},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8179858922958374},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.7842022180557251},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.7737428545951843},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.5442318320274353},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5397487878799438},{"id":"https://openalex.org/keywords/fast-fourier-transform","display_name":"Fast Fourier transform","score":0.4534280002117157},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.22346875071525574},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.15358611941337585}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8453413248062134},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8179858922958374},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.7842022180557251},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.7737428545951843},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.5442318320274353},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5397487878799438},{"id":"https://openalex.org/C75172450","wikidata":"https://www.wikidata.org/wiki/Q623950","display_name":"Fast Fourier transform","level":2,"score":0.4534280002117157},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.22346875071525574},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.15358611941337585}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1995896.1995938","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1995896.1995938","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the international conference on Supercomputing","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.307.4934","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.307.4934","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://users.ece.cmu.edu/~franzf/papers/ics2011.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320307102","display_name":"Intel Corporation","ror":"https://ror.org/01ek73717"},{"id":"https://openalex.org/F4320314637","display_name":"National Physical Science Consortium","ror":null},{"id":"https://openalex.org/F4320333566","display_name":"National Defense Science and Engineering Graduate","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W1494930385","https://openalex.org/W1528855243","https://openalex.org/W1543833969","https://openalex.org/W1547830536","https://openalex.org/W1555087507","https://openalex.org/W1595548352","https://openalex.org/W1965638741","https://openalex.org/W1977877565","https://openalex.org/W1984625316","https://openalex.org/W2037929850","https://openalex.org/W2045810654","https://openalex.org/W2064508949","https://openalex.org/W2090268225","https://openalex.org/W2096070062","https://openalex.org/W2099404643","https://openalex.org/W2102182691","https://openalex.org/W2111394443","https://openalex.org/W2114012357","https://openalex.org/W2127324789","https://openalex.org/W2134572726","https://openalex.org/W2136952590","https://openalex.org/W2139827696","https://openalex.org/W2147146929","https://openalex.org/W2147423491","https://openalex.org/W2149236155","https://openalex.org/W2160145830","https://openalex.org/W2164197394","https://openalex.org/W2167639788","https://openalex.org/W2169150396","https://openalex.org/W2498043873","https://openalex.org/W3024487921","https://openalex.org/W4240054498","https://openalex.org/W4245302940","https://openalex.org/W4245987756","https://openalex.org/W4285719527","https://openalex.org/W6659810996"],"related_works":["https://openalex.org/W2566637483","https://openalex.org/W2127324789","https://openalex.org/W3024308452","https://openalex.org/W4244894488","https://openalex.org/W4285390450","https://openalex.org/W2979513934","https://openalex.org/W2111180768","https://openalex.org/W2366442643","https://openalex.org/W2090268225","https://openalex.org/W2351155539"],"abstract_inverted_index":{"The":[0,197],"well-known":[1],"shift":[2],"to":[3,34,78],"parallelism":[4,20],"in":[5,21,137],"CPUs":[6],"is":[7,15,64,75,195],"often":[8,67],"associated":[9],"with":[10,93],"multicores.":[11],"However":[12],"another":[13],"trend":[14],"equally":[16],"salient:":[17],"the":[18,44,68,85,94,114,147,151,155,166,185,200,207],"increasing":[19],"per-core":[22],"single-instruction":[23],"multiple-date":[24],"(SIMD)":[25],"vector":[26,62,95,115,217],"units.":[27],"Intel's":[28],"SSE":[29],"and":[30,50,55,59,66,118,131,179,203,215],"IBM's":[31],"VMX":[32],"(compatible":[33],"AltiVec)":[35],"both":[36],"offer":[37,53],"4-way":[38],"(single":[39],"precision)":[40],"floating":[41],"point,":[42],"but":[43],"recent":[45],"Intel":[46],"instruction":[47,116,187],"sets":[48],"AVX":[49,178],"Larrabee":[51],"(LRB)":[52],"8-way":[54,177],"16-way,":[56],"respectively.":[57],"Compilation":[58],"optimization":[60],"for":[61,159,176,181,213],"extensions":[63],"hard,":[65],"achievable":[69],"speed-up":[70,173],"by":[71,149,154],"using":[72,80],"vectorizing":[73],"compilers":[74],"small":[76,129],"compared":[77],"hand-optimization":[79,98],"intrinsic":[81],"function":[82],"interfaces.":[83],"Unfortunately,":[84],"complexity":[86],"of":[87,122,140,174,199,209],"these":[88],"intrinsics":[89],"interfaces":[90],"increases":[91],"considerably":[92],"length,":[96],"making":[97],"a":[99,106,120,138,171],"nightmare.":[100],"In":[101],"this":[102],"paper,":[103],"we":[104,169],"present":[105],"peephole-based":[107],"vectorization":[108,172],"system":[109,148,202],"that":[110,134],"takes":[111],"as":[112,128],"input":[113],"semantics":[117],"outputs":[119],"library":[121],"basic":[123],"data":[124],"reorganization":[125],"blocks":[126,152],"such":[127],"transpositions":[130],"perfect":[132],"shuffles":[133],"are":[135,189],"needed":[136,153],"variety":[139],"high":[141,210],"performance":[142,211],"computing":[143],"applications.":[144],"We":[145],"evaluate":[146],"generating":[150],"program":[156],"generator":[157],"Spiral":[158,204],"vectorized":[160],"fast":[161],"Fourier":[162],"transforms":[163],"(FFTs).":[164],"With":[165],"generated":[167],"FFTs":[168,212],"achieve":[170],"5.5--6.5":[175],"10--12.5":[180],"16-way":[182],"LRB.":[183],"For":[184],"latter":[186],"counts":[188],"used":[190],"since":[191],"no":[192],"timing":[193],"information":[194],"available.":[196],"combination":[198],"proposed":[201],"thus":[205],"automates":[206],"production":[208],"current":[214],"future":[216],"architectures.":[218]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":7},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
