{"id":"https://openalex.org/W4411066161","doi":"https://doi.org/10.1145/3743136","title":"In-SRAM Parallel Data Shuffle","display_name":"In-SRAM Parallel Data Shuffle","publication_year":2025,"publication_date":"2025-06-05","ids":{"openalex":"https://openalex.org/W4411066161","doi":"https://doi.org/10.1145/3743136"},"language":"en","primary_location":{"id":"doi:10.1145/3743136","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3743136","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1145/3743136","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110181818","display_name":"Cuiyuan Jia","orcid":"https://orcid.org/0009-0003-8054-9131"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chaoyang Jia","raw_affiliation_strings":["National University of Defense Technology","National University of Defense Technology,  Changsha, China"],"raw_orcid":"https://orcid.org/0009-0003-8054-9131","affiliations":[{"raw_affiliation_string":"National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National University of Defense Technology,  Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025499211","display_name":"Dunbo Zhang","orcid":"https://orcid.org/0009-0001-8854-4752"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhang Dunbo","raw_affiliation_strings":["National University of Defense Technology","National University of Defense Technology,  Changsha China"],"raw_orcid":"https://orcid.org/0000-0003-2384-5359","affiliations":[{"raw_affiliation_string":"National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National University of Defense Technology,  Changsha China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093216369","display_name":"Qingjie Lang","orcid":"https://orcid.org/0009-0009-9456-8695"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingjie Lang","raw_affiliation_strings":["National University of Defense Technology","National University of Defense Technology,  Changsha China"],"raw_orcid":"https://orcid.org/0009-0009-9456-8695","affiliations":[{"raw_affiliation_string":"National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National University of Defense Technology,  Changsha China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101902622","display_name":"Ruoxi Wang","orcid":"https://orcid.org/0009-0009-5154-0086"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruoxi Wang","raw_affiliation_strings":["National University of Defense Technology","National University of Defense Technology,  Changsha China"],"raw_orcid":"https://orcid.org/0009-0009-5154-0086","affiliations":[{"raw_affiliation_string":"National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National University of Defense Technology,  Changsha China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101790620","display_name":"Li Shen","orcid":"https://orcid.org/0000-0001-9043-2998"},"institutions":[{"id":"https://openalex.org/I111149068","display_name":"National Defense University","ror":"https://ror.org/01nqk4x38","country_code":"US","type":"education","lineage":["https://openalex.org/I111149068"]},{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Li Shen","raw_affiliation_strings":["Department of Computing Science, National University of Defense Technology","Department of Computing Science, National University of Defense Technology, Changsha China"],"raw_orcid":"https://orcid.org/0000-0001-9043-2998","affiliations":[{"raw_affiliation_string":"Department of Computing Science, National University of Defense Technology","institution_ids":["https://openalex.org/I170215575","https://openalex.org/I111149068"]},{"raw_affiliation_string":"Department of Computing Science, National University of Defense Technology, Changsha China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5110181818"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":1.8633,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.82832399,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"22","issue":"3","first_page":"1","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11032","display_name":"VLSI and Analog Circuit Testing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11032","display_name":"VLSI and Analog Circuit Testing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.778400182723999},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6205180883407593},{"id":"https://openalex.org/keywords/static-random-access-memory","display_name":"Static random-access memory","score":0.4319089651107788},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.3423053026199341},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.1587265133857727}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.778400182723999},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6205180883407593},{"id":"https://openalex.org/C68043766","wikidata":"https://www.wikidata.org/wiki/Q267416","display_name":"Static random-access memory","level":2,"score":0.4319089651107788},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3423053026199341},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.1587265133857727}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3743136","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3743136","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3743136","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3743136","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3185965449","display_name":null,"funder_award_id":"62472436","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1530262073","https://openalex.org/W1986704057","https://openalex.org/W2012252449","https://openalex.org/W2029644811","https://openalex.org/W2034147186","https://openalex.org/W2079436606","https://openalex.org/W2109196467","https://openalex.org/W2167399819","https://openalex.org/W2291920536","https://openalex.org/W2514035975","https://openalex.org/W2593332841","https://openalex.org/W2613264175","https://openalex.org/W2613569094","https://openalex.org/W2794288888","https://openalex.org/W2946048183","https://openalex.org/W2946705112","https://openalex.org/W2976137532","https://openalex.org/W2982219368","https://openalex.org/W3015546205","https://openalex.org/W3017262157","https://openalex.org/W3081113604","https://openalex.org/W3123542955","https://openalex.org/W3128246765","https://openalex.org/W3147109109","https://openalex.org/W3191906639","https://openalex.org/W3217629644","https://openalex.org/W4233429846","https://openalex.org/W4238404680","https://openalex.org/W4388463752"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4392590355","https://openalex.org/W3151633427","https://openalex.org/W2212894501","https://openalex.org/W1979375376","https://openalex.org/W2793465010","https://openalex.org/W3024050170","https://openalex.org/W1976168335"],"abstract_inverted_index":{"While":[0],"Single":[1],"Instruction":[2],"Multiple":[3],"Data":[4],"(SIMD)":[5],"units":[6],"are":[7],"widely":[8],"employed":[9],"in":[10,55,82],"processors":[11],"for":[12,139,146],"neural":[13],"networks,":[14],"signal":[15],"processing,":[16],"and":[17,40,110,144],"high-performance":[18],"computing,":[19],"they":[20],"suffer":[21],"from":[22],"expensive":[23],"shuffle":[24,32,60,80,123,154],"operations":[25,33],"dedicated":[26],"to":[27,79],"data":[28,39,62,77,81,86,107,140,147],"alignment.":[29],"In":[30],"fact,":[31],"only":[34,142,177],"change":[35],"the":[36,89,130,162],"layout":[37,108],"of":[38,93,103,120,137],"ideally":[41],"should":[42],"be":[43,96],"done":[44],"entirely":[45],"within":[46],"memory.":[47],"To":[48],"this":[49,56],"end,":[50],"we":[51],"propose":[52],"Shuffle":[53,113,131,160,175],"SRAM":[54,66,94,114,132,176],"article,":[57],"which":[58],"can":[59,95,133,166],"multiple":[61],"elements":[63,87],"simultaneously":[64],"across":[65],"banks.":[67],"The":[68],"key":[69],"idea":[70],"is":[71],"exploiting":[72],"inter-bank":[73],"word":[74,91],"line":[75,92],"wise":[76],"movement":[78],"parallel,":[83],"where":[84],"all":[85],"on":[88,156],"same":[90],"shuffled":[97],"simultaneously,":[98],"achieving":[99],"a":[100,117],"high":[101],"level":[102],"parallelism.":[104],"Through":[105],"suitable":[106],"preparation":[109],"proper":[111],"control,":[112],"efficiently":[115],"supports":[116],"wide":[118],"range":[119],"commonly":[121],"used":[122],"operations.":[124],"Our":[125],"evaluation":[126],"results":[127],"show":[128],"that":[129],"reap":[134],"performance":[135],"benefits":[136],"14.3\u00d7":[138],"reorganization":[141,148],"applications":[143,151],"1.97\u00d7":[145],"+":[149],"computation":[150],"over":[152],"conventional":[153],"architecture":[155],"general-purpose":[157],"processors.":[158],"With":[159],"SRAM,":[161,174],"state-of-the-art":[163],"vector":[164],"processor":[165],"obtain":[167],"2.58\u00d7":[168],"energy":[169],"efficiency.":[170],"Compared":[171],"with":[172],"traditional":[173],"increases":[178],"3.5%":[179],"additional":[180],"area":[181],"overhead.":[182]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-30T09:04:40.226872","created_date":"2025-10-10T00:00:00"}
