{"id":"https://openalex.org/W7116274687","doi":"https://doi.org/10.1145/3750720.3757287","title":"Enhancing Superword-Level Parallelism on RISC-V through Custom Instruction Extensions","display_name":"Enhancing Superword-Level Parallelism on RISC-V through Custom Instruction Extensions","publication_year":2025,"publication_date":"2025-09-08","ids":{"openalex":"https://openalex.org/W7116274687","doi":"https://doi.org/10.1145/3750720.3757287"},"language":null,"primary_location":{"id":"doi:10.1145/3750720.3757287","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3750720.3757287","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Workshop Proceedings of the 54th International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3750720.3757287","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120920811","display_name":"Min-Hsiu Hsu","orcid":null},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Min-Hsiu Hsu","raw_affiliation_strings":["National Taiwan University, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan University, Taipei, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038791219","display_name":"Shih-Wei Liao","orcid":"https://orcid.org/0000-0001-5294-5274"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Shih-wei Liao","raw_affiliation_strings":["National Taiwan University, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan University, Taipei, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120878726","display_name":"Chi-Hung Wei","orcid":null},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Chi-Hung Wei","raw_affiliation_strings":["National Taiwan University, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan University, Taipei, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050513482","display_name":"Chi-Bang Kuan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210148979","display_name":"MediaTek (Taiwan)","ror":"https://ror.org/05g9jck81","country_code":"TW","type":"company","lineage":["https://openalex.org/I4210148979"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Chi-Bang Kuan","raw_affiliation_strings":["Mediatek, Hsinchu, Taiwan"],"affiliations":[{"raw_affiliation_string":"Mediatek, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I4210148979"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5120920811"],"corresponding_institution_ids":["https://openalex.org/I16733864"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.67306727,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"64","last_page":"70"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9075000286102295,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9075000286102295,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.021400000900030136,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10126","display_name":"Logic, programming, and type systems","score":0.016100000590085983,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spec#","display_name":"Spec#","score":0.7084000110626221},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.6935999989509583},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.679099977016449},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6585999727249146},{"id":"https://openalex.org/keywords/offset","display_name":"Offset (computer science)","score":0.5993000268936157},{"id":"https://openalex.org/keywords/instruction-level-parallelism","display_name":"Instruction-level parallelism","score":0.5875999927520752},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.5386999845504761},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.504800021648407},{"id":"https://openalex.org/keywords/vector-processor","display_name":"Vector processor","score":0.36500000953674316}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8435999751091003},{"id":"https://openalex.org/C2778565505","wikidata":"https://www.wikidata.org/wiki/Q2207566","display_name":"Spec#","level":2,"score":0.7084000110626221},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.6935999989509583},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.679099977016449},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6662999987602234},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6585999727249146},{"id":"https://openalex.org/C175291020","wikidata":"https://www.wikidata.org/wiki/Q1156822","display_name":"Offset (computer science)","level":2,"score":0.5993000268936157},{"id":"https://openalex.org/C140763907","wikidata":"https://www.wikidata.org/wiki/Q2714055","display_name":"Instruction-level parallelism","level":3,"score":0.5875999927520752},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.5386999845504761},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.504800021648407},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.36640000343322754},{"id":"https://openalex.org/C161824985","wikidata":"https://www.wikidata.org/wiki/Q919509","display_name":"Vector processor","level":2,"score":0.36500000953674316},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.362199991941452},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.352400004863739},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.35040000081062317},{"id":"https://openalex.org/C76782552","wikidata":"https://www.wikidata.org/wiki/Q110546","display_name":"Just-in-time compilation","level":3,"score":0.3479999899864197},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3425000011920929},{"id":"https://openalex.org/C200833197","wikidata":"https://www.wikidata.org/wiki/Q333707","display_name":"Compile time","level":3,"score":0.31450000405311584},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3102000057697296},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.3084000051021576},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.3046000003814697},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.3041999936103821},{"id":"https://openalex.org/C42992933","wikidata":"https://www.wikidata.org/wiki/Q691169","display_name":"Task parallelism","level":3,"score":0.301800012588501},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.2782000005245209},{"id":"https://openalex.org/C139571649","wikidata":"https://www.wikidata.org/wiki/Q1156793","display_name":"Program optimization","level":3,"score":0.26739999651908875},{"id":"https://openalex.org/C164833996","wikidata":"https://www.wikidata.org/wiki/Q2323839","display_name":"Automatic parallelization","level":3,"score":0.25929999351501465},{"id":"https://openalex.org/C8767382","wikidata":"https://www.wikidata.org/wiki/Q1058454","display_name":"Dynamic compilation","level":3,"score":0.2572999894618988},{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.25450000166893005},{"id":"https://openalex.org/C2776372474","wikidata":"https://www.wikidata.org/wiki/Q508291","display_name":"Simplicity","level":2,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3750720.3757287","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3750720.3757287","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Workshop Proceedings of the 54th International Conference on Parallel Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3750720.3757287","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3750720.3757287","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Workshop Proceedings of the 54th International Conference on Parallel Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.635860800743103,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W2090268225","https://openalex.org/W2111394443","https://openalex.org/W2127324789","https://openalex.org/W2479978724","https://openalex.org/W2613264175","https://openalex.org/W4307079523"],"related_works":[],"abstract_inverted_index":{"RISC-V":[0,62],"has":[1],"gained":[2],"widespread":[3],"attention":[4],"for":[5,15,30],"its":[6,20],"simplicity":[7],"and":[8,76,99],"extensibility,":[9],"making":[10],"it":[11],"a":[12],"compelling":[13],"choice":[14],"high-performance":[16],"processor":[17],"design.":[18],"However,":[19],"vector":[21,54],"extension,":[22],"RVV":[23],"(RISC-V":[24],"Vector":[25],"Extension),":[26],"lacks":[27],"efficient":[28],"support":[29,78],"immediate":[31,58],"offset":[32],"addressing,":[33],"which":[34],"limits":[35],"the":[36,61,66,87],"effectiveness":[37],"of":[38],"Superword-Level":[39],"Parallelism":[40],"(SLP)":[41],"optimization":[42],"in":[43],"certain":[44],"scenarios.":[45],"This":[46],"research":[47],"addresses":[48],"that":[49,93],"limitation":[50],"by":[51],"introducing":[52],"custom":[53,82],"load/store":[55],"instructions":[56],"with":[57,70],"offsets":[59],"to":[60,79],"ISA.":[63],"We":[64],"extend":[65],"LLVM":[67],"compiler":[68],"infrastructure":[69],"modified":[71],"TTI":[72],"hooks,":[73],"backend":[74],"TableGen,":[75],"QEMU":[77],"enable":[80],"these":[81],"instructions.":[83],"Experimental":[84],"results":[85],"on":[86],"SPEC":[88],"CPU2006":[89],"benchmark":[90],"suite":[91],"show":[92],"our":[94],"approach":[95],"reduces":[96],"vectorization":[97],"costs":[98],"improves":[100],"SLP":[101],"coverage,":[102],"yielding":[103],"notable":[104],"performance":[105],"gains.":[106]},"counts_by_year":[],"updated_date":"2025-12-21T02:06:08.432651","created_date":"2025-12-21T00:00:00"}
