{"id":"https://openalex.org/W2788774572","doi":"https://doi.org/10.1145/3178433.3178441","title":"Vectorization of a spectral finite-element numerical kernel","display_name":"Vectorization of a spectral finite-element numerical kernel","publication_year":2018,"publication_date":"2018-02-16","ids":{"openalex":"https://openalex.org/W2788774572","doi":"https://doi.org/10.1145/3178433.3178441","mag":"2788774572"},"language":"en","primary_location":{"id":"doi:10.1145/3178433.3178441","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3178433.3178441","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 4th Workshop on Programming Models for SIMD/Vector Processing","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068621969","display_name":"Sylvain Jubertie","orcid":null},"institutions":[{"id":"https://openalex.org/I12449238","display_name":"Universit\u00e9 d'Orl\u00e9ans","ror":"https://ror.org/014zrew76","country_code":"FR","type":"education","lineage":["https://openalex.org/I12449238"]},{"id":"https://openalex.org/I4210143826","display_name":"Institut National des Sciences Appliqu\u00e9es Centre Val de Loire","ror":"https://ror.org/03y0qc033","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210143826"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Sylvain Jubertie","raw_affiliation_strings":["LIFO EA, Univ. of Orl\u00e9ans, INSA CVL, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"LIFO EA, Univ. of Orl\u00e9ans, INSA CVL, France","institution_ids":["https://openalex.org/I12449238","https://openalex.org/I4210143826"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042957624","display_name":"Fabrice Dupros","orcid":"https://orcid.org/0000-0002-1069-9949"},"institutions":[{"id":"https://openalex.org/I4210158893","display_name":"Bureau de Recherches G\u00e9ologiques et Mini\u00e8res","ror":"https://ror.org/05hnb7x64","country_code":"FR","type":"government","lineage":["https://openalex.org/I4210158893"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Fabrice Dupros","raw_affiliation_strings":["BRGM, Orl\u00e9ans, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"BRGM, Orl\u00e9ans, France","institution_ids":["https://openalex.org/I4210158893"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061289747","display_name":"Florent de Martin","orcid":"https://orcid.org/0000-0003-3746-9067"},"institutions":[{"id":"https://openalex.org/I4210158893","display_name":"Bureau de Recherches G\u00e9ologiques et Mini\u00e8res","ror":"https://ror.org/05hnb7x64","country_code":"FR","type":"government","lineage":["https://openalex.org/I4210158893"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Florent De Martin","raw_affiliation_strings":["BRGM, Orl\u00e9ans, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"BRGM, Orl\u00e9ans, France","institution_ids":["https://openalex.org/I4210158893"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6245,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.63902806,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10339","display_name":"Advanced Numerical Methods in Computational Mathematics","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10339","display_name":"Advanced Numerical Methods in Computational Mathematics","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10271","display_name":"Seismic Imaging and Inversion Techniques","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1908","display_name":"Geophysics"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11416","display_name":"Numerical methods for differential equations","score":0.9764000177383423,"subfield":{"id":"https://openalex.org/subfields/2612","display_name":"Numerical Analysis"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.8402047157287598},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8262892365455627},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.8209828734397888},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7991230487823486},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.7767850160598755},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.619137167930603},{"id":"https://openalex.org/keywords/finite-element-method","display_name":"Finite element method","score":0.5732461214065552},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.49312683939933777},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.4789203405380249},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.13749486207962036},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07446298003196716}],"concepts":[{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.8402047157287598},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8262892365455627},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.8209828734397888},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7991230487823486},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.7767850160598755},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.619137167930603},{"id":"https://openalex.org/C135628077","wikidata":"https://www.wikidata.org/wiki/Q220184","display_name":"Finite element method","level":2,"score":0.5732461214065552},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.49312683939933777},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.4789203405380249},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.13749486207962036},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07446298003196716},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3178433.3178441","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3178433.3178441","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 4th Workshop on Programming Models for SIMD/Vector Processing","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-01835745v1","is_oa":false,"landing_page_url":"https://hal.science/hal-01835745","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"WPMVP 2018, Feb 2018, Vienna, France. &#x27E8;10.1145/3178433.3178441&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W170562765","https://openalex.org/W567646997","https://openalex.org/W1545372130","https://openalex.org/W1621885647","https://openalex.org/W1918634531","https://openalex.org/W1934367655","https://openalex.org/W1976110206","https://openalex.org/W1980140364","https://openalex.org/W2003343545","https://openalex.org/W2018225490","https://openalex.org/W2032090095","https://openalex.org/W2047352485","https://openalex.org/W2068052918","https://openalex.org/W2077368913","https://openalex.org/W2079728181","https://openalex.org/W2095420020","https://openalex.org/W2098267405","https://openalex.org/W2103527393","https://openalex.org/W2109625332","https://openalex.org/W2139871964","https://openalex.org/W2268800088","https://openalex.org/W2313756482","https://openalex.org/W2566915031","https://openalex.org/W2578190765","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W32529763","https://openalex.org/W2566637483","https://openalex.org/W2127324789","https://openalex.org/W3024308452","https://openalex.org/W4244894488","https://openalex.org/W4285390450","https://openalex.org/W2979513934","https://openalex.org/W2111180768","https://openalex.org/W2366442643","https://openalex.org/W2021715972"],"abstract_inverted_index":{"In":[0,27],"this":[1,28],"paper,":[2],"we":[3,132,171],"present":[4],"an":[5,95],"optimized":[6],"implementation":[7],"of":[8,23,77,182],"the":[9,21,30,33,43,49,54,58,80,89,112,118,124,153,166,178,183],"Finite-Element":[10],"Methods":[11],"numerical":[12],"kernel":[13],"for":[14,68,74,102],"SIMD":[15,160,184,202],"vectorization.":[16],"A":[17],"typical":[18],"application":[19,99],"is":[20,64,85],"modelling":[22],"seismic":[24],"wave":[25],"propagation.":[26],"case,":[29],"computations":[31],"at":[32,79],"element":[34,55],"level":[35,56,60],"are":[36,46,145,187],"generally":[37],"based":[38],"on":[39,94,190],"nested":[40,154],"loops":[41,155],"where":[42],"memory":[44,82,168],"accesses":[45],"non-contiguous.":[47],"Moreover,":[48],"back":[50],"and":[51,73,142,193,200],"forth":[52],"from":[53,107],"to":[57,116,147,165,176,214,217],"global":[59],"(e.g.,":[61],"assembly":[62],"phase)":[63],"a":[65,173],"serious":[66],"brake":[67],"automatic":[69,149],"vectorization":[70,150,208],"by":[71],"compilers":[72,136],"efficient":[75],"reuse":[76],"data":[78],"cache":[81],"levels.":[83],"This":[84],"particularly":[86],"true":[87],"when":[88,152,159],"problem":[90],"under":[91],"study":[92],"relies":[93],"unstructured":[96],"mesh.":[97],"The":[98],"proxies":[100],"used":[101],"our":[103,207],"experiments":[104],"were":[105,156,162],"extracted":[106],"EFISPEC":[108],"code":[109],"that":[110,123,134,196,206],"implements":[111],"spectral":[113],"finite-element":[114],"method":[115],"solve":[117],"elastodynamic":[119],"equations.":[120],"We":[121,204],"underline":[122],"intra-node":[125],"performance":[126,180],"may":[127,210],"be":[128,211,215],"further":[129],"improved.":[130],"Additionally,":[131],"show":[133],"standard":[135],"such":[137],"as":[138],"GNU":[139],"GCC,":[140],"Clang":[141],"Intel":[143,191],"ICC":[144],"unable":[146],"perform":[148],"even":[151],"reorganized":[157],"or":[158],"pragmas":[161],"added.":[163],"Due":[164],"irregular":[167],"access":[169],"pattern,":[170],"introduce":[172],"dedicated":[174],"strategy":[175],"squeeze":[177],"maximum":[179],"out":[181,189],"units.":[185,203],"Experiments":[186],"carried":[188],"Broadwell":[192],"Skylake":[194],"platforms":[195],"respectively":[197],"offer":[198],"AVX2":[199],"AVX-512":[201],"believe":[205],"approach":[209],"generic":[212],"enough":[213],"adapted":[216],"other":[218],"codes.":[219]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
