{"id":"https://openalex.org/W2033175061","doi":"https://doi.org/10.1093/comjnl/bxt049","title":"Efficient Utilization of SIMD Engines for General-Purpose Processors","display_name":"Efficient Utilization of SIMD Engines for General-Purpose Processors","publication_year":2013,"publication_date":"2013-05-31","ids":{"openalex":"https://openalex.org/W2033175061","doi":"https://doi.org/10.1093/comjnl/bxt049","mag":"2033175061"},"language":"en","primary_location":{"id":"doi:10.1093/comjnl/bxt049","is_oa":false,"landing_page_url":"https://doi.org/10.1093/comjnl/bxt049","pdf_url":null,"source":{"id":"https://openalex.org/S44643521","display_name":"The Computer Journal","issn_l":"0010-4620","issn":["0010-4620","1460-2067"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Computer Journal","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002716581","display_name":"Lixing Huang","orcid":"https://orcid.org/0000-0001-8650-715X"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"L. Huang","raw_affiliation_strings":["State Key Laboratory of High Performance Computing, National University of Defense Technology, Changsha 410073, China","School of Computer, National University of Defense Technology, Changsha 410073, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of High Performance Computing, National University of Defense Technology, Changsha 410073, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha 410073, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100441364","display_name":"Ziyi Wang","orcid":"https://orcid.org/0000-0001-8174-0593"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Z. Wang","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha 410073, China","State Key Laboratory of High Performance Computing, National University of Defense Technology, Changsha 410073, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha 410073, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"State Key Laboratory of High Performance Computing, National University of Defense Technology, Changsha 410073, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023506057","display_name":"Nong Xiao","orcid":"https://orcid.org/0000-0002-2166-977X"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"N. Xiao","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha 410073, China","State Key Laboratory of High Performance Computing, National University of Defense Technology, Changsha 410073, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha 410073, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"State Key Laboratory of High Performance Computing, National University of Defense Technology, Changsha 410073, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5090516040","display_name":"Qi Dou","orcid":"https://orcid.org/0000-0002-3416-9950"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Q. Dou","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha 410073, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha 410073, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5002716581"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":{"value":2635,"currency":"GBP","value_usd":3232},"apc_paid":null,"fwci":0.3152,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.59816088,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"57","issue":"8","first_page":"1141","last_page":"1154"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.9568913578987122},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8730127215385437},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6427088975906372},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6151494979858398},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.497144490480423},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4641808867454529},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.42621010541915894},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4150264263153076},{"id":"https://openalex.org/keywords/program-optimization","display_name":"Program optimization","score":0.41135379672050476},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.24622899293899536},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.20770347118377686}],"concepts":[{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.9568913578987122},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8730127215385437},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6427088975906372},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6151494979858398},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.497144490480423},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4641808867454529},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.42621010541915894},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4150264263153076},{"id":"https://openalex.org/C139571649","wikidata":"https://www.wikidata.org/wiki/Q1156793","display_name":"Program optimization","level":3,"score":0.41135379672050476},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.24622899293899536},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.20770347118377686},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1093/comjnl/bxt049","is_oa":false,"landing_page_url":"https://doi.org/10.1093/comjnl/bxt049","pdf_url":null,"source":{"id":"https://openalex.org/S44643521","display_name":"The Computer Journal","issn_l":"0010-4620","issn":["0010-4620","1460-2067"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Computer Journal","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.7599999904632568}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2291920536","https://openalex.org/W2022397046","https://openalex.org/W2162726111","https://openalex.org/W1603583590","https://openalex.org/W338671845","https://openalex.org/W2286348849","https://openalex.org/W2018511057","https://openalex.org/W2478126787","https://openalex.org/W1587248296","https://openalex.org/W2063770303"],"abstract_inverted_index":{"This":[0,33],"paper":[1],"proposes":[2],"stream":[3,59],"model":[4],"execution":[5],"(SME),":[6],"a":[7,36,48,89],"new":[8],"architectural":[9],"technique":[10],"that":[11,113],"supports":[12],"the":[13,30,51,63,68,106],"efficient":[14,42],"utilization":[15],"of":[16,53,70,123],"single-instruction":[17],"multiple-data":[18],"(SIMD)":[19],"engines":[20,55],"on":[21],"general-purpose":[22],"processors.":[23],"SME":[24,96,107,114],"adopts":[25],"lightweight":[26],"streaming":[27],"processing":[28],"as":[29,74],"intermediate":[31],"representation.":[32],"process":[34],"uses":[35],"hardware\u2013software":[37],"co-design":[38],"approach":[39],"to":[40,61,66],"support":[41,60],"SIMD":[43,54,71],"compilation":[44],"and":[45,77,102,128],"utilization.":[46],"From":[47],"hardware":[49],"perspective,":[50],"micro-architecture":[52],"is":[56],"enabled":[57],"for":[58,125,130],"provide":[62],"key":[64],"components":[65],"address":[67],"limitations":[69],"utilization,":[72],"such":[73],"data":[75],"rearrangement":[76],"non-consecutive":[78],"memory":[79],"access.":[80],"Thus,":[81],"sequential":[82],"code":[83,94,98],"can":[84,115],"be":[85],"compiled":[86],"efficiently":[87],"through":[88],"two-step":[90],"procedure":[91],"(i.e.":[92],"C":[93,97],"\u2192":[95,99],"vectorized":[100],"code)":[101],"then":[103],"executed":[104],"by":[105,120],"hardware.":[108],"Our":[109],"experimental":[110],"evaluation":[111],"shows":[112],"outperform":[116],"previous":[117],"optimization":[118],"techniques":[119],"an":[121],"average":[122],"58%":[124],"multimedia":[126],"kernels":[127],"26%":[129],"applications.":[131]},"counts_by_year":[{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
