{"id":"https://openalex.org/W4224277975","doi":"https://doi.org/10.1145/3528425.3529100","title":"Exploring source-to-source compiler transformation of OpenMP SIMD constructs for Intel AVX and Arm SVE vector architectures","display_name":"Exploring source-to-source compiler transformation of OpenMP SIMD constructs for Intel AVX and Arm SVE vector architectures","publication_year":2022,"publication_date":"2022-04-02","ids":{"openalex":"https://openalex.org/W4224277975","doi":"https://doi.org/10.1145/3528425.3529100"},"language":"en","primary_location":{"id":"doi:10.1145/3528425.3529100","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3528425.3529100","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirteenth International Workshop on Programming Models and Applications for Multicores and Manycores","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039987576","display_name":"Patrick J. Flynn","orcid":"https://orcid.org/0000-0002-5446-114X"},"institutions":[{"id":"https://openalex.org/I102149020","display_name":"University of North Carolina at Charlotte","ror":"https://ror.org/04dawnj30","country_code":"US","type":"education","lineage":["https://openalex.org/I102149020"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Patrick Flynn","raw_affiliation_strings":["University of North Carolina at Charlotte"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of North Carolina at Charlotte","institution_ids":["https://openalex.org/I102149020"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059056563","display_name":"Xinyao Yi","orcid":"https://orcid.org/0000-0002-6614-1274"},"institutions":[{"id":"https://openalex.org/I102149020","display_name":"University of North Carolina at Charlotte","ror":"https://ror.org/04dawnj30","country_code":"US","type":"education","lineage":["https://openalex.org/I102149020"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xinyao Yi","raw_affiliation_strings":["University of North Carolina at Charlotte"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of North Carolina at Charlotte","institution_ids":["https://openalex.org/I102149020"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102715061","display_name":"Yonghong Yan","orcid":"https://orcid.org/0000-0002-5274-8526"},"institutions":[{"id":"https://openalex.org/I102149020","display_name":"University of North Carolina at Charlotte","ror":"https://ror.org/04dawnj30","country_code":"US","type":"education","lineage":["https://openalex.org/I102149020"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yonghong Yan","raw_affiliation_strings":["University of North Carolina at Charlotte"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of North Carolina at Charlotte","institution_ids":["https://openalex.org/I102149020"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.3328,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.87419355,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"11","last_page":"20"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.8836968541145325},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.8124700784683228},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7901015281677246},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7599632740020752},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.7451682686805725},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.5978071689605713},{"id":"https://openalex.org/keywords/vector-processor","display_name":"Vector processor","score":0.42359495162963867},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.41588079929351807},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.24114751815795898}],"concepts":[{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.8836968541145325},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.8124700784683228},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7901015281677246},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7599632740020752},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.7451682686805725},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.5978071689605713},{"id":"https://openalex.org/C161824985","wikidata":"https://www.wikidata.org/wiki/Q919509","display_name":"Vector processor","level":2,"score":0.42359495162963867},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.41588079929351807},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.24114751815795898}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3528425.3529100","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3528425.3529100","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirteenth International Workshop on Programming Models and Applications for Multicores and Manycores","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7566237621","display_name":null,"funder_award_id":"2015254","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W92174080","https://openalex.org/W2118031182","https://openalex.org/W2127324789","https://openalex.org/W2136991556","https://openalex.org/W2519929754","https://openalex.org/W2623389408","https://openalex.org/W2734941459","https://openalex.org/W2744708049","https://openalex.org/W2887161653","https://openalex.org/W3008788679","https://openalex.org/W3086504363"],"related_works":["https://openalex.org/W2566637483","https://openalex.org/W32529763","https://openalex.org/W2368918171","https://openalex.org/W881027429","https://openalex.org/W2085171150","https://openalex.org/W2106561276","https://openalex.org/W2413558124","https://openalex.org/W2787898679","https://openalex.org/W4287322835","https://openalex.org/W4285302443"],"abstract_inverted_index":{"Over":[0,55],"the":[1,43,56,74,77,81,98,120,126,162],"past":[2,57],"decade,":[3],"SIMD":[4,46,85,122],"(single":[5],"instruction":[6],"multiple":[7],"data)":[8],"or":[9],"vector":[10,140],"architectures":[11],"have":[12],"made":[13,62],"significant":[14,63],"advances,":[15],"now":[16],"existing":[17],"across":[18],"a":[19,129],"wide":[20],"range":[21],"of":[22,42,76,111,115,128,158],"devices":[23],"from":[24,53],"commodity":[25,49],"CPUs":[26,52],"to":[27,48,136,153],"high":[28],"performance":[29,145],"computing":[30],"(HPC)":[31],"cores.":[32],"Intel's":[33],"AVX":[34,137,149],"(Advanced":[35],"Vector":[36,70],"Extensions)":[37],"architecture":[38,102],"has":[39,61,86,94],"been":[40],"one":[41],"most":[44],"popular":[45],"extensions":[47],"and":[50,90,138,150,166],"HPC":[51],"Intel.":[54],"few":[58],"years,":[59],"Arm":[60,151],"inroads":[64],"with":[65],"its":[66],"new":[67],"SVE":[68,139,152],"(Scalable":[69],"Extension),":[71],"used":[72],"in":[73],"supercomputer":[75],"top":[78],"place":[79],"on":[80,147],"Top500":[82],"list.":[83],"As":[84],"become":[87,95],"more":[88,91],"advanced":[89],"important,":[92],"it":[93],"equally":[96],"important":[97],"compilers":[99],"support":[100],"these":[101],"extensions.":[103],"In":[104],"this":[105,156],"paper,":[106],"we":[107,143],"present":[108,125],"our":[109],"approach":[110],"source-to-source":[112],"compiler":[113],"transformation":[114],"explicit":[116],"vectorization":[117,159],"constructs":[118],"using":[119],"OpenMP":[121],"directive.":[123],"We":[124],"design":[127],"unified":[130],"IR":[131],"that":[132],"is":[133],"easily":[134],"translated":[135],"architectures.":[141],"Finally,":[142],"conduct":[144],"evaluations":[146],"Intel":[148],"demonstrate":[154],"how":[155],"method":[157],"can":[160],"bridge":[161],"gap":[163],"between":[164],"auto-":[165],"manual-":[167],"vectorization.":[168]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":6}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
