{"id":"https://openalex.org/W7127637469","doi":"https://doi.org/10.1145/3769002.3769969","title":"Adaptive Block Size Selection for Translating Triton Kernels to RVV","display_name":"Adaptive Block Size Selection for Translating Triton Kernels to RVV","publication_year":2025,"publication_date":"2025-11-16","ids":{"openalex":"https://openalex.org/W7127637469","doi":"https://doi.org/10.1145/3769002.3769969"},"language":null,"primary_location":{"id":"doi:10.1145/3769002.3769969","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3769002.3769969","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Research in Adaptive and Convergent Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100981067","display_name":"Liu Yuhao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Liu Yuhao","raw_affiliation_strings":["Chinese University of Hong Kong, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Chinese University of Hong Kong, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125055891","display_name":"William Kevin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"William Kevin","raw_affiliation_strings":["Chinese University of Hong Kong, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Chinese University of Hong Kong, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125061195","display_name":"Feige Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feige Zhou","raw_affiliation_strings":["Chinese University of Hong Kong, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Chinese University of Hong Kong, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112315978","display_name":"Yeh Ching Chung","orcid":null},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yeh-Ching Chung","raw_affiliation_strings":["Chinese University of Hong Kong, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Chinese University of Hong Kong, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000640012","display_name":"Wei\u2010Chung Hsu","orcid":"https://orcid.org/0000-0001-8583-8459"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei-Chung Hsu","raw_affiliation_strings":["Chinese University of Hong Kong, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Chinese University of Hong Kong, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100981067"],"corresponding_institution_ids":["https://openalex.org/I4210116924"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.79631024,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5658000111579895,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5658000111579895,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.24040000140666962,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.025200000032782555,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5892999768257141},{"id":"https://openalex.org/keywords/register-allocation","display_name":"Register allocation","score":0.5713000297546387},{"id":"https://openalex.org/keywords/software-portability","display_name":"Software portability","score":0.5483999848365784},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5482000112533569},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.48919999599456787},{"id":"https://openalex.org/keywords/succinctness","display_name":"Succinctness","score":0.4634000062942505},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4629000127315521},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.4433000087738037}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7850000262260437},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5913000106811523},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5892999768257141},{"id":"https://openalex.org/C128916667","wikidata":"https://www.wikidata.org/wiki/Q1343660","display_name":"Register allocation","level":3,"score":0.5713000297546387},{"id":"https://openalex.org/C63000827","wikidata":"https://www.wikidata.org/wiki/Q3080428","display_name":"Software portability","level":2,"score":0.5483999848365784},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5482000112533569},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.48919999599456787},{"id":"https://openalex.org/C2776493592","wikidata":"https://www.wikidata.org/wiki/Q5158717","display_name":"Succinctness","level":2,"score":0.4634000062942505},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4629000127315521},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.4433000087738037},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.43959999084472656},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3540000021457672},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3375000059604645},{"id":"https://openalex.org/C41431624","wikidata":"https://www.wikidata.org/wiki/Q1053357","display_name":"Block size","level":3,"score":0.33079999685287476},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.3296999931335449},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.32600000500679016},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.31540000438690186},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.2987000048160553},{"id":"https://openalex.org/C2780428219","wikidata":"https://www.wikidata.org/wiki/Q16952335","display_name":"Cover (algebra)","level":2,"score":0.29789999127388},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.29490000009536743},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.27970001101493835},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.27469998598098755},{"id":"https://openalex.org/C117280010","wikidata":"https://www.wikidata.org/wiki/Q180944","display_name":"Register file","level":3,"score":0.25859999656677246},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.2581000030040741}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3769002.3769969","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3769002.3769969","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Research in Adaptive and Convergent Systems","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W2604546911","https://openalex.org/W2954698171","https://openalex.org/W2960833983","https://openalex.org/W2971544482","https://openalex.org/W3211127556","https://openalex.org/W4282970339","https://openalex.org/W4408891426"],"related_works":[],"abstract_inverted_index":{"The":[0],"increasing":[1],"demand":[2],"for":[3,14,25,139,173],"efficient":[4,32],"AI":[5,34,178],"inference":[6],"on":[7,76,88,99,132],"edge":[8,47,177],"devices":[9,48],"has":[10],"intensified":[11],"the":[12,28,39,93,147,167],"need":[13],"high-performance":[15],"and":[16,103,117,125,153],"portable":[17],"programming":[18],"models.":[19],"Triton,":[20],"a":[21,61,84,109],"tile-based":[22],"language":[23],"designed":[24],"GPUs,":[26],"lowers":[27],"threshold":[29],"of":[30,42,64,96,170],"writing":[31],"custom":[33],"kernels.":[35],"This":[36,165],"paper":[37],"addresses":[38],"performance":[40,74,94,119,148,152,168],"portability":[41,169],"translating":[43],"Triton":[44,65,158,171],"kernels":[45,66,159,172],"to":[46,79,145],"such":[49],"as":[50],"RISC-V":[51],"CPUs":[52],"equipped":[53],"with":[54,67],"Vector":[55],"Extension":[56],"(RVV).":[57],"We":[58],"identify":[59,92],"that":[60],"direct":[62],"port":[63],"GPU-tuned":[68],"parameters,":[69],"BLOCK_SIZE,":[70],"results":[71],"in":[72,176],"significant":[73],"degradation":[75],"RVV":[77],"due":[78],"fundamental":[80],"architectural":[81],"differences.":[82],"Through":[83],"detailed":[85],"empirical":[86],"evaluation":[87],"several":[89],"benchmarks,":[90],"we":[91,135],"impact":[95],"tiling":[97],"parameters":[98],"vector":[100,122,128,154],"register":[101,123,129,155],"spilling":[102],"cache":[104,116,151],"performance.":[105],"Our":[106],"analysis":[107],"reveals":[108],"critical":[110],"trade-off:":[111],"some":[112,137],"BLOCK_SIZE":[113],"values":[114],"improve":[115],"prefetching":[118],"but":[120],"increase":[121],"pressures":[124],"cause":[126],"intense":[127],"spilling.":[130],"Based":[131],"these":[133],"insights,":[134],"develop":[136],"heuristics":[138],"selecting":[140],"an":[141],"adaptive":[142],"block":[143],"size":[144],"balance":[146],"trade-off":[149],"between":[150],"spilling,":[156],"mapping":[157],"more":[160],"efficiently":[161],"onto":[162],"RVV-based":[163],"systems.":[164],"enhances":[166],"heterogeneous":[174],"computing":[175],"inference.":[179]},"counts_by_year":[],"updated_date":"2026-02-07T06:11:34.122080","created_date":"2026-02-06T00:00:00"}
