{"id":"https://openalex.org/W2520139353","doi":"https://doi.org/10.1109/hpcsim.2016.7568423","title":"Neat SIMD: Elegant vectorization in C++ by using specialized templates","display_name":"Neat SIMD: Elegant vectorization in C++ by using specialized templates","publication_year":2016,"publication_date":"2016-07-01","ids":{"openalex":"https://openalex.org/W2520139353","doi":"https://doi.org/10.1109/hpcsim.2016.7568423","mag":"2520139353"},"language":"en","primary_location":{"id":"doi:10.1109/hpcsim.2016.7568423","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpcsim.2016.7568423","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 International Conference on High Performance Computing &amp; Simulation (HPCS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008400749","display_name":"Matthias Gro\u00df","orcid":"https://orcid.org/0000-0002-3716-7512"},"institutions":[{"id":"https://openalex.org/I99977706","display_name":"FH Aachen","ror":"https://ror.org/04tqgg260","country_code":"DE","type":"education","lineage":["https://openalex.org/I99977706"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Matthias Gross","raw_affiliation_strings":["Aachen Technology Center (AaTC), Aachen, Germany"],"affiliations":[{"raw_affiliation_string":"Aachen Technology Center (AaTC), Aachen, Germany","institution_ids":["https://openalex.org/I99977706"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5008400749"],"corresponding_institution_ids":["https://openalex.org/I99977706"],"apc_list":null,"apc_paid":null,"fwci":0.6307,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.67744969,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"848","last_page":"857"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.9730747938156128},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8388528823852539},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.7811006307601929},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7467347979545593},{"id":"https://openalex.org/keywords/porting","display_name":"Porting","score":0.6511629223823547},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6209112405776978},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.5080997943878174},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.4979102611541748},{"id":"https://openalex.org/keywords/code-generation","display_name":"Code generation","score":0.46204373240470886},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.37307271361351013},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.3204266130924225},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.2621356248855591},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.171433687210083}],"concepts":[{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.9730747938156128},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8388528823852539},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.7811006307601929},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7467347979545593},{"id":"https://openalex.org/C106251023","wikidata":"https://www.wikidata.org/wiki/Q851989","display_name":"Porting","level":3,"score":0.6511629223823547},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6209112405776978},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.5080997943878174},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.4979102611541748},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.46204373240470886},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.37307271361351013},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.3204266130924225},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.2621356248855591},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.171433687210083},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpcsim.2016.7568423","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpcsim.2016.7568423","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 International Conference on High Performance Computing &amp; Simulation (HPCS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W78313762","https://openalex.org/W1552556042","https://openalex.org/W2013156670","https://openalex.org/W2066162225","https://openalex.org/W2069504277","https://openalex.org/W2163000655","https://openalex.org/W3145750999","https://openalex.org/W4248145683","https://openalex.org/W6633204398"],"related_works":["https://openalex.org/W2566637483","https://openalex.org/W2127324789","https://openalex.org/W3024308452","https://openalex.org/W4244894488","https://openalex.org/W4285390450","https://openalex.org/W2966127030","https://openalex.org/W2366442643","https://openalex.org/W2090268225","https://openalex.org/W2021715972","https://openalex.org/W75461624"],"abstract_inverted_index":{"Most":[0],"of":[1,65,104],"today's":[2],"processors":[3],"provide":[4],"strong":[5],"support":[6],"for":[7,111,198],"single":[8],"instruction":[9,58,143],"multiple":[10,63],"data":[11],"(SIMD,":[12],"or":[13,145],"vector)":[14],"operations.":[15],"But":[16],"because":[17],"modern":[18],"compilers":[19],"often":[20],"fail":[21],"to":[22,56,69,108,123,132,140,146,192],"generate":[23],"optimal":[24],"SIMD":[25,29,38,77,88,115,125,128,196],"code,":[26],"explicitly":[27],"written":[28,162],"code":[30,39,67,107,167,182,187,197],"is":[31,47,52,137,161],"still":[32],"the":[33,185],"most":[34],"performant.":[35],"However,":[36],"native":[37,124],"has":[40,129],"some":[41],"major":[42],"disadvantages,":[43],"mainly":[44],"that":[45,50,61,99,164],"it":[46,51,138],"quite":[48],"cumbersome,":[49],"not":[53],"easily":[54,105],"ported":[55],"different":[57],"sets,":[59],"and":[60,80,114,157,169,190],"typically":[62],"versions":[64],"similar":[66],"have":[68],"be":[70,109],"written;":[71],"e.g.":[72],"a":[73,75,147],"scalar,":[74],"streaming":[76],"extensions":[78,84],"(SSE)":[79],"an":[81],"advanced":[82],"vector":[83],"(AVX)":[85],"version.":[86,150],"Neat":[87,127,195],"resolves":[89],"all":[90,165],"these":[91],"disadvantages":[92],"by":[93],"using":[94],"specialized":[95],"templates":[96],"in":[97,121],"C++":[98,149],"enable":[100],"one":[101],"neat":[102],"version":[103],"portable":[106],"used":[110],"both":[112],"scalar":[113],"instructions":[116],"without":[117],"sacrificing":[118],"any":[119,133,141],"performance":[120],"comparison":[122],"code.":[126],"no":[130],"dependencies":[131],"external":[134],"library":[135],"nor":[136],"restricted":[139],"specific":[142],"set":[144],"newer":[148],"The":[151],"examples":[152],"shown":[153],"here":[154],"use":[155,180,193],"SSE":[156],"AVX.":[158],"This":[159],"publication":[160],"such":[163],"crucial":[166],"elements":[168],"implementation":[170],"hints":[171],"are":[172],"given":[173],"here.":[174],"An":[175],"interested":[176],"software":[177],"engineer":[178],"can":[179],"those":[181],"elements,":[183],"reconstruct":[184],"remaining":[186],"with":[188],"ease,":[189],"start":[191],"effective":[194],"own":[199],"purposes.":[200]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2019,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
