{"id":"https://openalex.org/W2123616149","doi":"https://doi.org/10.1145/1375527.1375564","title":"Automatic SIMD vectorization of chains of recurrences","display_name":"Automatic SIMD vectorization of chains of recurrences","publication_year":2008,"publication_date":"2008-06-07","ids":{"openalex":"https://openalex.org/W2123616149","doi":"https://doi.org/10.1145/1375527.1375564","mag":"2123616149"},"language":"en","primary_location":{"id":"doi:10.1145/1375527.1375564","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1375527.1375564","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd annual international conference on Supercomputing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074125036","display_name":"Yixin Shou","orcid":null},"institutions":[{"id":"https://openalex.org/I103163165","display_name":"Florida State University","ror":"https://ror.org/05g3dte14","country_code":"US","type":"education","lineage":["https://openalex.org/I103163165"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yixin Shou","raw_affiliation_strings":["Florida State University, Tallahassee, FL, USA","Florida State University , Tallahassee , FL , USA"],"affiliations":[{"raw_affiliation_string":"Florida State University, Tallahassee, FL, USA","institution_ids":["https://openalex.org/I103163165"]},{"raw_affiliation_string":"Florida State University , Tallahassee , FL , USA","institution_ids":["https://openalex.org/I103163165"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109048740","display_name":"Robert A. van Engelen","orcid":null},"institutions":[{"id":"https://openalex.org/I103163165","display_name":"Florida State University","ror":"https://ror.org/05g3dte14","country_code":"US","type":"education","lineage":["https://openalex.org/I103163165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Robert A. van Engelen","raw_affiliation_strings":["Florida State University, Tallahassee, FL, USA","Florida State University , Tallahassee , FL , USA"],"affiliations":[{"raw_affiliation_string":"Florida State University, Tallahassee, FL, USA","institution_ids":["https://openalex.org/I103163165"]},{"raw_affiliation_string":"Florida State University , Tallahassee , FL , USA","institution_ids":["https://openalex.org/I103163165"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5074125036"],"corresponding_institution_ids":["https://openalex.org/I103163165"],"apc_list":null,"apc_paid":null,"fwci":0.3444,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.69911504,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"245","last_page":"255"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.8695837259292603},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7558096647262573},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.7482686042785645},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6645275354385376},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.6354044675827026},{"id":"https://openalex.org/keywords/grid","display_name":"Grid","score":0.6071958541870117},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.553516149520874},{"id":"https://openalex.org/keywords/vector-processor","display_name":"Vector processor","score":0.47441479563713074},{"id":"https://openalex.org/keywords/decoupling","display_name":"Decoupling (probability)","score":0.4721280038356781},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.45790693163871765},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.45333796739578247},{"id":"https://openalex.org/keywords/scalar","display_name":"Scalar (mathematics)","score":0.43954598903656006},{"id":"https://openalex.org/keywords/euclidean-vector","display_name":"Euclidean vector","score":0.4194839894771576},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.404536634683609},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.1792510449886322},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.16889280080795288},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1663133203983307},{"id":"https://openalex.org/keywords/geometry","display_name":"Geometry","score":0.09847810864448547}],"concepts":[{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.8695837259292603},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7558096647262573},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.7482686042785645},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6645275354385376},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.6354044675827026},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.6071958541870117},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.553516149520874},{"id":"https://openalex.org/C161824985","wikidata":"https://www.wikidata.org/wiki/Q919509","display_name":"Vector processor","level":2,"score":0.47441479563713074},{"id":"https://openalex.org/C205606062","wikidata":"https://www.wikidata.org/wiki/Q5249645","display_name":"Decoupling (probability)","level":2,"score":0.4721280038356781},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.45790693163871765},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.45333796739578247},{"id":"https://openalex.org/C57691317","wikidata":"https://www.wikidata.org/wiki/Q1289248","display_name":"Scalar (mathematics)","level":2,"score":0.43954598903656006},{"id":"https://openalex.org/C118965365","wikidata":"https://www.wikidata.org/wiki/Q44528","display_name":"Euclidean vector","level":2,"score":0.4194839894771576},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.404536634683609},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.1792510449886322},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16889280080795288},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1663133203983307},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.09847810864448547},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1375527.1375564","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1375527.1375564","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd annual international conference on Supercomputing","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.297.7517","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.297.7517","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.fsu.edu/~engelen/fp139-shou.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W27956500","https://openalex.org/W90801636","https://openalex.org/W1491178396","https://openalex.org/W1494930385","https://openalex.org/W1498163379","https://openalex.org/W1500117749","https://openalex.org/W1573046885","https://openalex.org/W1997483018","https://openalex.org/W2008398511","https://openalex.org/W2046105247","https://openalex.org/W2047937237","https://openalex.org/W2065854195","https://openalex.org/W2115452265","https://openalex.org/W2141493298","https://openalex.org/W2148597203","https://openalex.org/W2296760900","https://openalex.org/W6629325410"],"related_works":["https://openalex.org/W2566637483","https://openalex.org/W2127324789","https://openalex.org/W2368918171","https://openalex.org/W881027429","https://openalex.org/W2085171150","https://openalex.org/W4287322835","https://openalex.org/W2787898679","https://openalex.org/W2037577547","https://openalex.org/W32529763","https://openalex.org/W2069605723"],"abstract_inverted_index":{"Many":[0],"computational":[1],"tasks":[2],"require":[3,60],"repeated":[4],"evaluation":[5],"of":[6,19,52,57,110,128,162,180],"functions":[7,59,106,130],"over":[8,47,166],"structured":[9],"grids,":[10],"such":[11],"as":[12],"plotting":[13],"in":[14,22,119,132],"a":[15,37,95,158],"coordinate":[16],"system,":[17],"rendering":[18],"parametric":[20],"objects":[21],"2D":[23],"and":[24,29,39,134,147,168,193],"3D,":[25],"numerical":[26],"grid":[27,66],"generation,":[28],"signal":[30],"processing.":[31],"In":[32],"this":[33,91],"paper,":[34],"we":[35,93],"present":[36,70],"method":[38,98,165],"toolset":[40],"to":[41,63,79,86,103,141,176],"speed":[42,175],"up":[43],"closed-form":[44,58],"function":[45],"evaluations":[46],"grids":[48],"by":[49],"vectorizing":[50],"Chains":[51,109],"Recurrences":[53,111],"(CR).":[54],"CR":[55,71,74,101],"forms":[56,75,138],"fewer":[61],"operations":[62],"evaluate":[64],"per":[65],"point.":[67],"However,":[68],"the":[69,80,87,100,142,148,173],"formalism":[72],"makes":[73],"inherently":[76],"non-vectorizable":[77],"due":[78],"dependences":[81],"carried":[82],"from":[83,171],"one":[84],"point":[85],"next.":[88],"To":[89],"address":[90],"limitation,":[92],"developed":[94,189],"new":[96],"decoupling":[97],"for":[99,123,186,190],"algebra":[102],"translate":[104],"math":[105,152],"into":[107],"Vector":[108],"(VCR)":[112],"forms.":[113],"The":[114,155],"VCR":[115,137,164,187],"coefficients":[116],"are":[117,139],"packed":[118],"short":[120],"vector":[121,151],"registers":[122],"efficient":[124],"execution.":[125],"Performance":[126],"results":[127,156],"benchmark":[129],"evaluated":[131],"single":[133],"double":[135],"precision":[136],"compared":[140],"Intel":[143],"compiler's":[144],"auto-vectorized":[145],"code":[146],"high-performance":[149],"small":[150],"library":[153],"(SVML).":[154],"show":[157],"significant":[159],"performance":[160,192],"increase":[161],"our":[163],"SVML":[167],"scalar":[169],"CRs,":[170],"doubling":[172],"execution":[174],"running":[177],"an":[178],"order":[179],"magnitude":[181],"faster.":[182],"An":[183],"auto-tuning":[184],"tool":[185],"is":[188],"optimal":[191],"accuracy.":[194]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
