{"id":"https://openalex.org/W1986704057","doi":"https://doi.org/10.1109/hpca.2010.5416631","title":"SIF: Overcoming the limitations of SIMD devices via implicit permutation","display_name":"SIF: Overcoming the limitations of SIMD devices via implicit permutation","publication_year":2010,"publication_date":"2010-01-01","ids":{"openalex":"https://openalex.org/W1986704057","doi":"https://doi.org/10.1109/hpca.2010.5416631","mag":"1986704057"},"language":"en","primary_location":{"id":"doi:10.1109/hpca.2010.5416631","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca.2010.5416631","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"HPCA - 16 2010 The Sixteenth International Symposium on High-Performance Computer Architecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044840341","display_name":"Libo Huang","orcid":"https://orcid.org/0000-0001-7878-3998"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Libo Huang","raw_affiliation_strings":["School of Computer, National University of Defense Technology, China","[School of Computer, National University of Defense Technology, China.]"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"[School of Computer, National University of Defense Technology, China.]","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101790620","display_name":"Li Shen","orcid":"https://orcid.org/0000-0001-9043-2998"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Shen","raw_affiliation_strings":["School of Computer, National University of Defense Technology, China","[School of Computer, National University of Defense Technology, China.]"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"[School of Computer, National University of Defense Technology, China.]","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047295889","display_name":"Zhiying Wang","orcid":"https://orcid.org/0000-0003-3339-3085"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiying Wang","raw_affiliation_strings":["School of Computer, National University of Defense Technology, China","[School of Computer, National University of Defense Technology, China.]"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"[School of Computer, National University of Defense Technology, China.]","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102027515","display_name":"Wei Shi","orcid":"https://orcid.org/0009-0001-5926-3441"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Shi","raw_affiliation_strings":["School of Computer, National University of Defense Technology, China","[School of Computer, National University of Defense Technology, China.]"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"[School of Computer, National University of Defense Technology, China.]","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023506057","display_name":"Nong Xiao","orcid":"https://orcid.org/0000-0002-2166-977X"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Nong Xiao","raw_affiliation_strings":["School of Computer, National University of Defense Technology, China","[School of Computer, National University of Defense Technology, China.]"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"[School of Computer, National University of Defense Technology, China.]","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100760813","display_name":"Sheng Ma","orcid":"https://orcid.org/0000-0003-1710-4060"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sheng Ma","raw_affiliation_strings":["School of Computer, National University of Defense Technology, China","[School of Computer, National University of Defense Technology, China.]"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"[School of Computer, National University of Defense Technology, China.]","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5044840341"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":1.7478,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.85111304,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.912650465965271},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8556246757507324},{"id":"https://openalex.org/keywords/datapath","display_name":"Datapath","score":0.8266151547431946},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7016242146492004},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5383400321006775},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.518110454082489},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5079520344734192},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.5075980424880981},{"id":"https://openalex.org/keywords/permutation","display_name":"Permutation (music)","score":0.4704921245574951},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.15658313035964966}],"concepts":[{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.912650465965271},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8556246757507324},{"id":"https://openalex.org/C2781198647","wikidata":"https://www.wikidata.org/wiki/Q1633673","display_name":"Datapath","level":2,"score":0.8266151547431946},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7016242146492004},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5383400321006775},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.518110454082489},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5079520344734192},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.5075980424880981},{"id":"https://openalex.org/C21308566","wikidata":"https://www.wikidata.org/wiki/Q7169365","display_name":"Permutation (music)","level":2,"score":0.4704921245574951},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.15658313035964966},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca.2010.5416631","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca.2010.5416631","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"HPCA - 16 2010 The Sixteenth International Symposium on High-Performance Computer Architecture","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1531065716","https://openalex.org/W1555915743","https://openalex.org/W1965638741","https://openalex.org/W1966324811","https://openalex.org/W1976158744","https://openalex.org/W1984588379","https://openalex.org/W1998249532","https://openalex.org/W2032094184","https://openalex.org/W2051598123","https://openalex.org/W2052129173","https://openalex.org/W2069703635","https://openalex.org/W2099404643","https://openalex.org/W2100579514","https://openalex.org/W2105012172","https://openalex.org/W2108824541","https://openalex.org/W2111394443","https://openalex.org/W2111659605","https://openalex.org/W2112980698","https://openalex.org/W2117285153","https://openalex.org/W2118533539","https://openalex.org/W2118688057","https://openalex.org/W2120585153","https://openalex.org/W2121711421","https://openalex.org/W2130048397","https://openalex.org/W2132020380","https://openalex.org/W2132587889","https://openalex.org/W2137249790","https://openalex.org/W2150243807","https://openalex.org/W2156250610","https://openalex.org/W2167639788","https://openalex.org/W2169150396","https://openalex.org/W2725179571","https://openalex.org/W4242539338","https://openalex.org/W4244897181","https://openalex.org/W4245987756","https://openalex.org/W4248145683","https://openalex.org/W4254476753","https://openalex.org/W4254558528","https://openalex.org/W6633127185","https://openalex.org/W6641587366","https://openalex.org/W6675526518","https://openalex.org/W6678014108"],"related_works":["https://openalex.org/W2382510858","https://openalex.org/W2479014312","https://openalex.org/W1583465708","https://openalex.org/W2053732522","https://openalex.org/W2009882312","https://openalex.org/W2133317661","https://openalex.org/W2000051442","https://openalex.org/W1585350690","https://openalex.org/W2571578228","https://openalex.org/W2123803416"],"abstract_inverted_index":{"SIMD":[0,28,47,81,124,133,153,166,203],"devices":[1,29],"have":[2],"gained":[3],"widespread":[4],"acceptance":[5],"in":[6,30,80,91,102,123,163,172],"modern":[7],"microprocessor":[8],"designs":[9],"for":[10,14],"their":[11],"superior":[12],"performance":[13,192],"multimedia":[15],"applications.":[16],"However,":[17],"there":[18],"are":[19],"three":[20,53,110],"remaining":[21],"limitations":[22,111],"to":[23,75],"the":[24,86,107,119,130,142,183],"efficient":[25,46],"utilization":[26],"of":[27,109,144,182],"general-purpose":[31],"computer":[32],"systems:":[33],"memory":[34],"alignment,":[35],"data":[36],"reorganization":[37],"and":[38,69,176,190],"control":[39],"flow.":[40],"This":[41],"paper":[42],"presents":[43],"SIF,":[44],"an":[45],"interface":[48],"framework":[49],"that":[50,128,180],"addresses":[51],"these":[52,170],"shortcomings":[54],"without":[55],"modifying":[56],"existing":[57,165],"ISA.":[58],"It":[59],"is":[60,138,198],"designed":[61],"around":[62],"a":[63,126,173],"permutation":[64,78,87,96,114,184],"vector":[65],"register":[66],"file":[67],"(PVRF)":[68],"it":[70,160],"adds":[71],"new":[72],"extended":[73],"instructions":[74,122,186],"set":[76],"internal":[77],"state":[79,88,120],"datapath":[82],"rather":[83],"than":[84,201],"putting":[85],"setting":[89,121],"bits":[90],"every":[92],"instruction.":[93],"The":[94],"implicit":[95],"capability":[97],"provided":[98],"by":[99,112],"PVRF":[100],"results":[101,178],"zero":[103],"overhead,":[104],"which":[105,158,197],"frees":[106],"handling":[108],"using":[113],"instructions.":[115],"To":[116],"further":[117],"reduce":[118],"datapath,":[125],"technique":[127],"moves":[129],"workloads":[131],"from":[132],"pipeline":[134,137],"into":[135,155],"scalar":[136],"also":[139],"introduced.":[140],"With":[141],"help":[143],"proposed":[145],"compilation":[146],"algorithm,":[147],"SIF":[148,156],"can":[149,187,194],"efficiently":[150],"transform":[151],"regular":[152],"codes":[154,157],"make":[159],"easily":[161],"integrated":[162],"all":[164],"devices.":[167],"We":[168],"implemented":[169],"techniques":[171,204],"vectorizing":[174],"compiler":[175],"experimental":[177],"show":[179],"most":[181],"overhead":[185],"be":[188,195],"eliminated":[189],"distinct":[191],"speedup":[193],"achieved,":[196],"37%":[199],"higher":[200],"current":[202],"on":[205],"average.":[206]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":1}],"updated_date":"2026-01-09T23:09:53.351390","created_date":"2025-10-10T00:00:00"}
