{"id":"https://openalex.org/W4417403974","doi":"https://doi.org/10.1109/pact65351.2025.00022","title":"EARTH: Efficient Architecture for RISC-V Vector Memory Access","display_name":"EARTH: Efficient Architecture for RISC-V Vector Memory Access","publication_year":2025,"publication_date":"2025-11-03","ids":{"openalex":"https://openalex.org/W4417403974","doi":"https://doi.org/10.1109/pact65351.2025.00022"},"language":null,"primary_location":{"id":"doi:10.1109/pact65351.2025.00022","is_oa":false,"landing_page_url":"https://doi.org/10.1109/pact65351.2025.00022","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 34th International Conference on Parallel Architectures and Compilation Techniques (PACT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056093953","display_name":"Hong Guan","orcid":"https://orcid.org/0000-0001-5053-5052"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongyi Guan","raw_affiliation_strings":["Tsinghua University,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028680646","display_name":"Yichuan Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yichuan Gao","raw_affiliation_strings":["Intel Labs China,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Intel Labs China,Beijing,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051851357","display_name":"Chenlu Miao","orcid":"https://orcid.org/0009-0005-8233-1528"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chenlu Miao","raw_affiliation_strings":["Intel Labs China,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Intel Labs China,Beijing,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023537734","display_name":"Haoyang Wu","orcid":"https://orcid.org/0000-0002-0644-7554"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haoyang Wu","raw_affiliation_strings":["Intel Labs China,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Intel Labs China,Beijing,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hang Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hang Zhu","raw_affiliation_strings":["Independent Researcher"],"affiliations":[{"raw_affiliation_string":"Independent Researcher","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073708689","display_name":"M Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingfeng Lin","raw_affiliation_strings":["Shenzhen University,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Shenzhen University,Shenzhen,China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"last","author":{"id":null,"display_name":"Huayue Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huayue Liang","raw_affiliation_strings":["Intel Labs China,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Intel Labs China,Beijing,China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5056093953"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":2.1258,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.90358098,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"122","last_page":"134"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6791999936103821,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6791999936103821,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.14100000262260437,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.05550000071525574,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/physical-address","display_name":"Physical address","score":0.5083000063896179},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.47429999709129333},{"id":"https://openalex.org/keywords/virtual-memory","display_name":"Virtual memory","score":0.4293000102043152},{"id":"https://openalex.org/keywords/byte","display_name":"Byte","score":0.4083999991416931},{"id":"https://openalex.org/keywords/data-access","display_name":"Data access","score":0.353300005197525},{"id":"https://openalex.org/keywords/transposition","display_name":"Transposition (logic)","score":0.3515999913215637},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.32260000705718994},{"id":"https://openalex.org/keywords/memory-map","display_name":"Memory map","score":0.31700000166893005},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.305400013923645}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7289999723434448},{"id":"https://openalex.org/C41036726","wikidata":"https://www.wikidata.org/wiki/Q844824","display_name":"Physical address","level":3,"score":0.5083000063896179},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.5078999996185303},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.47429999709129333},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4683000147342682},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.45339998602867126},{"id":"https://openalex.org/C76399640","wikidata":"https://www.wikidata.org/wiki/Q189401","display_name":"Virtual memory","level":4,"score":0.4293000102043152},{"id":"https://openalex.org/C43364308","wikidata":"https://www.wikidata.org/wiki/Q8799","display_name":"Byte","level":2,"score":0.4083999991416931},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.353300005197525},{"id":"https://openalex.org/C12455157","wikidata":"https://www.wikidata.org/wiki/Q7835331","display_name":"Transposition (logic)","level":2,"score":0.3515999913215637},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.32260000705718994},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.3199999928474426},{"id":"https://openalex.org/C74426580","wikidata":"https://www.wikidata.org/wiki/Q719484","display_name":"Memory map","level":3,"score":0.31700000166893005},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.305400013923645},{"id":"https://openalex.org/C2779602883","wikidata":"https://www.wikidata.org/wiki/Q15544750","display_name":"Memory architecture","level":2,"score":0.3010999858379364},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.29829999804496765},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.29739999771118164},{"id":"https://openalex.org/C63511323","wikidata":"https://www.wikidata.org/wiki/Q908936","display_name":"Interleaved memory","level":4,"score":0.2849999964237213},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2827000021934509},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.27889999747276306},{"id":"https://openalex.org/C93446704","wikidata":"https://www.wikidata.org/wiki/Q449328","display_name":"Registered memory","level":3,"score":0.2705000042915344},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.2667999863624573},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C198082294","wikidata":"https://www.wikidata.org/wiki/Q3399648","display_name":"Position (finance)","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.2635999917984009},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C2781357197","wikidata":"https://www.wikidata.org/wiki/Q5757597","display_name":"High memory","level":2,"score":0.2540999948978424},{"id":"https://openalex.org/C98986596","wikidata":"https://www.wikidata.org/wiki/Q1143031","display_name":"Semiconductor memory","level":2,"score":0.2531000077724457},{"id":"https://openalex.org/C194739806","wikidata":"https://www.wikidata.org/wiki/Q66221","display_name":"Computer data storage","level":2,"score":0.25200000405311584}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/pact65351.2025.00022","is_oa":false,"landing_page_url":"https://doi.org/10.1109/pact65351.2025.00022","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 34th International Conference on Parallel Architectures and Compilation Techniques (PACT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1983394510","https://openalex.org/W2094969361","https://openalex.org/W2144481293","https://openalex.org/W2613264175","https://openalex.org/W2899396210","https://openalex.org/W2994193159","https://openalex.org/W3158044478","https://openalex.org/W4291972732","https://openalex.org/W4307079523","https://openalex.org/W4366341968","https://openalex.org/W4378807770","https://openalex.org/W4379116055","https://openalex.org/W4401568823","https://openalex.org/W4402475780","https://openalex.org/W4415309624"],"related_works":[],"abstract_inverted_index":{"Vector":[0],"processors":[1],"frequently":[2],"suffer":[3],"from":[4],"inefficient":[5],"memory":[6,13,48,109,151,219,264],"accesses,":[7,122],"particularly":[8],"for":[9,96,128,178,217],"strided":[10,18,121,132,218,236],"and":[11,130,152,255,273],"segment":[12,64,162,180,243],"access":[14,110,265],"patterns.":[15],"While":[16],"coalescing":[17,135],"accesses":[19,137,220],"is":[20],"a":[21,33,94,106,166],"natural":[22],"solution,":[23],"effectively":[24],"gathering":[25,129],"or":[26,49,56,80],"scattering":[27,131],"elements":[28],"at":[29,188],"fixed":[30],"strides":[31],"remains":[32],"significant":[34],"challenge.":[35],"Naive":[36],"approaches":[37],"typically":[38],"rely":[39],"on":[40,197,203],"high-overhead":[41],"crossbars":[42],"that":[43,170,213],"remap":[44],"any":[45,52],"byte":[46],"in":[47,54,225,232],"registers":[50,55,153],"to":[51,59,113,222,246],"position":[53],"memory,":[57],"leading":[58],"physical":[60],"design":[61,196],"issues.":[62],"Meanwhile,":[63],"operations":[65],"require":[66],"row-column":[67],"transpositions,":[68],"which":[69],"are":[70,91],"often":[71],"handled":[72],"using":[73],"either":[74],"element-level":[75],"in-place":[76,185],"transposition":[77,84,187],"(degrading":[78],"performance)":[79],"large":[81],"buffer-based":[82],"bulk":[83,186],"(incurring":[85],"high":[86],"area":[87,252],"overhead).":[88],"Both":[89],"options":[90],"undesirable,":[92],"highlighting":[93],"need":[95,177],"more":[97],"efficient":[98],"solutions.":[99],"In":[100],"this":[101],"paper,":[102],"we":[103],"present":[104],"EARTH,":[105],"novel":[107],"vector":[108,207,276],"architecture":[111,239],"designed":[112],"overcome":[114],"these":[115,262],"challenges":[116],"through":[117,154],"shifting-based":[118],"optimizations.":[119],"For":[120,161],"EARTH":[123,164,195,214,249,267],"integrates":[124],"specialized":[125],"shift":[126],"networks":[127],"elements.":[133],"After":[134],"multiple":[136],"into":[138],"one":[139],"request":[140],"within":[141],"the":[142,155,176,193,271],"same":[143],"cache":[144],"line,":[145],"data":[146],"can":[147],"be":[148],"routed":[149],"between":[150],"shifting":[156],"network":[157],"with":[158,199],"minimal":[159],"overhead.":[160,190],"operations,":[163],"employs":[165],"shifted":[167],"register":[168],"bank":[169],"enables":[171],"direct":[172],"column-wise":[173],"access,":[174],"eliminating":[175],"dedicated":[179],"buffers":[181],"while":[182],"providing":[183],"highperformance,":[184],"acceptable":[189],"We":[191],"implemented":[192],"entire":[194],"FPGA":[198],"Chisel":[200],"HDL":[201],"based":[202],"an":[204],"open-source":[205],"RISC-V":[206],"unit":[208],"Saturn.":[209],"Our":[210],"evaluation":[211],"demonstrates":[212],"enhances":[215],"performance":[216,272],"proportionally":[221],"their":[223],"prevalence":[224],"workloads,":[226],"achieving":[227],"$\\mathbf{4":[228],"x}-\\mathbf{8":[229],"x}$":[230],"speedups":[231],"benchmarks":[233],"dominated":[234],"by":[235,253,258],"operations.":[237],"The":[238],"also":[240],"delivers":[241],"area-efficient":[242],"handling.":[244],"Compared":[245],"conventional":[247],"designs,":[248],"reducing":[250],"hardware":[251],"9%":[254],"power":[256],"consumption":[257],"41%.":[259],"By":[260],"optimizing":[261],"necessary":[263],"patterns,":[266],"significantly":[268],"advances":[269],"both":[270],"efficiency":[274],"of":[275],"processors.":[277]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-12-16T00:00:00"}
