{"id":"https://openalex.org/W2040281526","doi":"https://doi.org/10.1145/2145816.2145824","title":"Efficient SIMD code generation for irregular kernels","display_name":"Efficient SIMD code generation for irregular kernels","publication_year":2012,"publication_date":"2012-02-25","ids":{"openalex":"https://openalex.org/W2040281526","doi":"https://doi.org/10.1145/2145816.2145824","mag":"2040281526"},"language":"en","primary_location":{"id":"doi:10.1145/2145816.2145824","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2145816.2145824","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th ACM SIGPLAN symposium on Principles and Practice of Parallel Programming","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064270590","display_name":"Seonggun Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Seonggun Kim","raw_affiliation_strings":["Samsung Advanced Institute of Technology, Yongin, South Korea","[Samsung Advanced Institute of Technology, Yongin, South Korea]"],"affiliations":[{"raw_affiliation_string":"Samsung Advanced Institute of Technology, Yongin, South Korea","institution_ids":["https://openalex.org/I2250650973"]},{"raw_affiliation_string":"[Samsung Advanced Institute of Technology, Yongin, South Korea]","institution_ids":["https://openalex.org/I2250650973"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035561846","display_name":"Hwansoo Han","orcid":"https://orcid.org/0000-0001-7182-8452"},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hwansoo Han","raw_affiliation_strings":["Sungkyunkwan University, Suwon, South Korea"],"affiliations":[{"raw_affiliation_string":"Sungkyunkwan University, Suwon, South Korea","institution_ids":["https://openalex.org/I848706"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5064270590"],"corresponding_institution_ids":["https://openalex.org/I2250650973"],"apc_list":null,"apc_paid":null,"fwci":4.3509,"has_fulltext":false,"cited_by_count":38,"citation_normalized_percentile":{"value":0.94519393,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"55","last_page":"64"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.9581612348556519},{"id":"https://openalex.org/keywords/indirection","display_name":"Indirection","score":0.9021494388580322},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.850565493106842},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8074479103088379},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.7097872495651245},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6863776445388794},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5399899482727051},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.5143135786056519},{"id":"https://openalex.org/keywords/operand","display_name":"Operand","score":0.42679280042648315},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.14460432529449463},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.13438096642494202}],"concepts":[{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.9581612348556519},{"id":"https://openalex.org/C89377073","wikidata":"https://www.wikidata.org/wiki/Q1171224","display_name":"Indirection","level":2,"score":0.9021494388580322},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.850565493106842},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8074479103088379},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.7097872495651245},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6863776445388794},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5399899482727051},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.5143135786056519},{"id":"https://openalex.org/C55526617","wikidata":"https://www.wikidata.org/wiki/Q719375","display_name":"Operand","level":2,"score":0.42679280042648315},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.14460432529449463},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.13438096642494202},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2145816.2145824","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2145816.2145824","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th ACM SIGPLAN symposium on Principles and Practice of Parallel Programming","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W114095935","https://openalex.org/W1494930385","https://openalex.org/W1591319746","https://openalex.org/W1837653564","https://openalex.org/W1856176155","https://openalex.org/W1981772986","https://openalex.org/W2005343162","https://openalex.org/W2022711417","https://openalex.org/W2036853599","https://openalex.org/W2051598123","https://openalex.org/W2069703635","https://openalex.org/W2099404643","https://openalex.org/W2104378884","https://openalex.org/W2104508714","https://openalex.org/W2111394443","https://openalex.org/W2113798864","https://openalex.org/W2118382442","https://openalex.org/W2147423491","https://openalex.org/W2153185479","https://openalex.org/W2167639788","https://openalex.org/W2168093120","https://openalex.org/W2600258283","https://openalex.org/W2606963276","https://openalex.org/W4244894488","https://openalex.org/W4245987756","https://openalex.org/W4285719527","https://openalex.org/W6995434384"],"related_works":["https://openalex.org/W2566637483","https://openalex.org/W2127324789","https://openalex.org/W3024308452","https://openalex.org/W4244894488","https://openalex.org/W2040281526","https://openalex.org/W4285390450","https://openalex.org/W3010528205","https://openalex.org/W2979513934","https://openalex.org/W2090268225","https://openalex.org/W99277194"],"abstract_inverted_index":{"Array":[0],"indirection":[1,47,69,132],"causes":[2],"several":[3],"challenges":[4,29,58],"for":[5,32,52,88,150],"compilers":[6,41],"to":[7,30,36,70,83,116,157],"utilize":[8],"single":[9],"instruction,":[10],"multiple":[11],"data":[12,102,111],"(SIMD)":[13],"instructions.":[14],"Disjoint":[15],"memory":[16,20,72,92],"references,":[17,21],"arbitrarily":[18],"misaligned":[19],"and":[22,73,98],"dependence":[23],"cycles":[24],"in":[25,114],"loops":[26,44,51,89],"are":[27,135],"main":[28],"handle":[31],"SIMD":[33,40,53,86,126,148,160],"compilers.":[34],"Due":[35],"those":[37,57],"challenges,":[38],"existing":[39,159],"have":[42],"excluded":[43],"with":[45,153],"array":[46,68,131,154],"from":[48,137],"their":[49],"candidate":[50],"vectorization.":[54,127],"However,":[55],"addressing":[56],"is":[59],"inevitable,":[60],"since":[61],"many":[62],"important":[63],"compute-intensive":[64],"applications":[65],"extensively":[66],"use":[67],"reduce":[71],"computation":[74],"requirements.":[75],"In":[76],"this":[77],"work,":[78],"we":[79],"propose":[80],"a":[81],"method":[82,145,165],"generate":[84],"efficient":[85],"code":[87,113,149],"containing":[90],"indirected":[91],"references.":[93],"We":[94,107],"extract":[95],"both":[96],"inter-":[97],"intra-iteration":[99],"parallelism,":[100],"taking":[101],"reorganization":[103,112,119],"overhead":[104,120],"into":[105],"consideration.":[106],"also":[108],"optimally":[109],"place":[110],"order":[115],"amortize":[117],"the":[118,122,158,168],"through":[121],"performance":[123,169],"gain":[124],"of":[125,170],"Experiments":[128],"on":[129,175],"four":[130],"kernels,":[133],"which":[134],"extracted":[136],"real-world":[138],"scientific":[139],"applications,":[140],"show":[141],"that":[142],"our":[143,163],"proposed":[144,164],"effectively":[146],"generates":[147],"irregular":[151,171],"kernels":[152,172],"indirection.":[155],"Compared":[156],"vectorization":[161],"methods,":[162],"significantly":[166],"improves":[167],"by":[173],"91%,":[174],"average.":[176]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":9},{"year":2016,"cited_by_count":7},{"year":2015,"cited_by_count":8},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
