{"id":"https://openalex.org/W4401454459","doi":"https://doi.org/10.1145/3677333.3678151","title":"Rewriting and Optimizing Vector Length Agnostic Intrinsics from Arm SVE to RVV","display_name":"Rewriting and Optimizing Vector Length Agnostic Intrinsics from Arm SVE to RVV","publication_year":2024,"publication_date":"2024-08-09","ids":{"openalex":"https://openalex.org/W4401454459","doi":"https://doi.org/10.1145/3677333.3678151"},"language":"en","primary_location":{"id":"doi:10.1145/3677333.3678151","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3677333.3678151","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3677333.3678151","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 53rd International Conference on Parallel Processing Workshops","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3677333.3678151","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5104176522","display_name":"Jhih-Kuan Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I25846049","display_name":"National Tsing Hua University","ror":"https://ror.org/00zdnkx70","country_code":"TW","type":"education","lineage":["https://openalex.org/I25846049"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Jhih-Kuan Lin","raw_affiliation_strings":["Department of Computer Science, National Tsing Hua University, Taiwan"],"raw_orcid":"https://orcid.org/0009-0001-9559-6059","affiliations":[{"raw_affiliation_string":"Department of Computer Science, National Tsing Hua University, Taiwan","institution_ids":["https://openalex.org/I25846049"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yu-Lun Yang","orcid":"https://orcid.org/0009-0001-7753-8613"},"institutions":[{"id":"https://openalex.org/I25846049","display_name":"National Tsing Hua University","ror":"https://ror.org/00zdnkx70","country_code":"TW","type":"education","lineage":["https://openalex.org/I25846049"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yu-Lun Yang","raw_affiliation_strings":["Department of Computer Science, National Tsing Hua University, Taiwan"],"raw_orcid":"https://orcid.org/0009-0001-7753-8613","affiliations":[{"raw_affiliation_string":"Department of Computer Science, National Tsing Hua University, Taiwan","institution_ids":["https://openalex.org/I25846049"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065924524","display_name":"Hung-Ming Lai","orcid":"https://orcid.org/0000-0002-8623-0469"},"institutions":[{"id":"https://openalex.org/I25846049","display_name":"National Tsing Hua University","ror":"https://ror.org/00zdnkx70","country_code":"TW","type":"education","lineage":["https://openalex.org/I25846049"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hung-Ming Lai","raw_affiliation_strings":["Department of Computer Science, National Tsing Hua University, Taiwan"],"raw_orcid":"https://orcid.org/0000-0002-8623-0469","affiliations":[{"raw_affiliation_string":"Department of Computer Science, National Tsing Hua University, Taiwan","institution_ids":["https://openalex.org/I25846049"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038803698","display_name":"Jenq\u2010Kuen Lee","orcid":"https://orcid.org/0000-0001-9919-6258"},"institutions":[{"id":"https://openalex.org/I25846049","display_name":"National Tsing Hua University","ror":"https://ror.org/00zdnkx70","country_code":"TW","type":"education","lineage":["https://openalex.org/I25846049"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Jenq-Kuen Lee","raw_affiliation_strings":["Department of Computer Science, National Tsing Hua University, Taiwan"],"raw_orcid":"https://orcid.org/0000-0001-9919-6258","affiliations":[{"raw_affiliation_string":"Department of Computer Science, National Tsing Hua University, Taiwan","institution_ids":["https://openalex.org/I25846049"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.7647,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.85457029,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"38","last_page":"47"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/intrinsics","display_name":"Intrinsics","score":0.9557878971099854},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.8855162858963013},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8598475456237793},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7098106145858765},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.550564169883728},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.5364312529563904},{"id":"https://openalex.org/keywords/rewriting","display_name":"Rewriting","score":0.5334774255752563},{"id":"https://openalex.org/keywords/operand","display_name":"Operand","score":0.5217300653457642},{"id":"https://openalex.org/keywords/porting","display_name":"Porting","score":0.5071642398834229},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5033664107322693},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.4902839958667755},{"id":"https://openalex.org/keywords/software-portability","display_name":"Software portability","score":0.46302950382232666},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.1662352979183197},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.14395904541015625}],"concepts":[{"id":"https://openalex.org/C2908650547","wikidata":"https://www.wikidata.org/wiki/Q20999234","display_name":"Intrinsics","level":2,"score":0.9557878971099854},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.8855162858963013},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8598475456237793},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7098106145858765},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.550564169883728},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.5364312529563904},{"id":"https://openalex.org/C154690210","wikidata":"https://www.wikidata.org/wiki/Q1668499","display_name":"Rewriting","level":2,"score":0.5334774255752563},{"id":"https://openalex.org/C55526617","wikidata":"https://www.wikidata.org/wiki/Q719375","display_name":"Operand","level":2,"score":0.5217300653457642},{"id":"https://openalex.org/C106251023","wikidata":"https://www.wikidata.org/wiki/Q851989","display_name":"Porting","level":3,"score":0.5071642398834229},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5033664107322693},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.4902839958667755},{"id":"https://openalex.org/C63000827","wikidata":"https://www.wikidata.org/wiki/Q3080428","display_name":"Software portability","level":2,"score":0.46302950382232666},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.1662352979183197},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.14395904541015625}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3677333.3678151","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3677333.3678151","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3677333.3678151","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 53rd International Conference on Parallel Processing Workshops","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3677333.3678151","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3677333.3678151","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3677333.3678151","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 53rd International Conference on Parallel Processing Workshops","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.5,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4401454459.pdf"},"referenced_works_count":12,"referenced_works":["https://openalex.org/W1133087070","https://openalex.org/W1871378126","https://openalex.org/W2138843457","https://openalex.org/W2170499656","https://openalex.org/W2793513899","https://openalex.org/W2911593804","https://openalex.org/W3103937600","https://openalex.org/W3132840842","https://openalex.org/W4307922031","https://openalex.org/W4320067907","https://openalex.org/W4396531826","https://openalex.org/W4396680706"],"related_works":["https://openalex.org/W2566637483","https://openalex.org/W2127324789","https://openalex.org/W3024308452","https://openalex.org/W4244894488","https://openalex.org/W4289638474","https://openalex.org/W99277194","https://openalex.org/W2069504277","https://openalex.org/W4285390450","https://openalex.org/W2966127030","https://openalex.org/W1939907483"],"abstract_inverted_index":{"Advanced":[0],"processors":[1],"incorporate":[2],"SIMD":[3,16,53,122],"extensions":[4,17],"to":[5,130,182,206],"execute":[6],"data-parallel":[7],"operations":[8],"efficiently.":[9],"As":[10],"technology":[11],"advances,":[12],"new":[13,99],"generations":[14],"of":[15,101,210],"evolve":[18],"with":[19,33,47,58,103],"longer":[20],"vector":[21,34],"register":[22,49],"lengths,":[23],"making":[24],"target-specific":[25,59],"program":[26],"non-portable.":[27],"To":[28],"address":[29],"this":[30,108],"issue,":[31],"architectures":[32,95],"length":[35],"agnostic":[36],"(VLA)":[37],"programming":[38,57,172],"models":[39],"have":[40],"emerged,":[41],"which":[42,190],"can":[43],"scale":[44],"across":[45],"implementations":[46,185],"varying":[48],"lengths.":[50],"However,":[51],"utilizing":[52],"hardware":[54],"commonly":[55],"involves":[56],"intrinsics.":[60,180],"Despite":[61],"VLA\u2019s":[62],"scalability":[63],"within":[64],"the":[65,117,169,183,188,195],"same":[66],"ISA,":[67],"intrinsics":[68,119,129],"programs":[69],"designed":[70],"for":[71,93,125,140,152],"one":[72],"VLA":[73,84,94],"target":[74],"would":[75],"still":[76,200],"encounter":[77],"portability":[78],"issues":[79],"when":[80],"deployed":[81],"on":[82,116],"other":[83],"architectures.":[85],"Although":[86],"automatic":[87],"rewriting":[88,91,113,120],"techniques":[89],"exist,":[90],"strategies":[92,114],"are":[96,176,191],"a":[97],"relatively":[98],"area":[100],"research":[102],"limited":[104],"studies":[105],"available.":[106],"In":[107,157],"work,":[109],"we":[110,149,160],"present":[111],"our":[112,158,162,197],"based":[115],"open-source":[118],"library,":[121],"Everywhere":[123],"(SIMDe),":[124],"porting":[126],"Arm":[127,170],"SVE":[128,144,171,179],"RISC-V":[131],"Vector":[132],"Extension":[133],"(RVV).":[134],"Our":[135],"method":[136],"efficiently":[137],"transforms":[138],"masks":[139],"predicated":[141],"instructions":[142],"between":[143],"and":[145],"RVV":[146,199],"formats.":[147],"Additionally,":[148],"introduce":[150],"algorithms":[151],"removing":[153],"redundant":[154],"mask":[155],"computation.":[156],"experiment,":[159],"evaluate":[161],"approach":[163],"using":[164],"compute":[165],"kernels":[166],"collected":[167],"from":[168,204],"example":[173],"document":[174],"that":[175],"written":[177],"in":[178,187,208],"Compared":[181],"scalar":[184],"provided":[186],"document,":[189],"further":[192],"vectorized":[193],"by":[194],"compiler,":[196],"rewritten":[198],"achieves":[201],"speedup":[202],"ranging":[203],"1.39\u00d7":[205],"28.9\u00d7":[207],"terms":[209],"dynamic":[211],"instruction":[212],"count.":[213]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-12T08:23:45.883708","created_date":"2025-10-10T00:00:00"}
