{"id":"https://openalex.org/W4253306330","doi":"https://doi.org/10.1109/micro.2016.7783717","title":"Cache-emulated register file: An integrated on-chip memory architecture for high performance GPGPUs","display_name":"Cache-emulated register file: An integrated on-chip memory architecture for high performance GPGPUs","publication_year":2016,"publication_date":"2016-10-01","ids":{"openalex":"https://openalex.org/W4253306330","doi":"https://doi.org/10.1109/micro.2016.7783717"},"language":"en","primary_location":{"id":"doi:10.1109/micro.2016.7783717","is_oa":false,"landing_page_url":"https://doi.org/10.1109/micro.2016.7783717","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 49th Annual IEEE/ACM International Symposium on Microarchitecture (MICRO)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045693138","display_name":"Naifeng Jing","orcid":"https://orcid.org/0000-0001-8417-5796"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Naifeng Jing","raw_affiliation_strings":["Advanced Computer Architecture Laboratory, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Advanced Computer Architecture Laboratory, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100652312","display_name":"Jianfei Wang","orcid":"https://orcid.org/0000-0003-0942-518X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianfei Wang","raw_affiliation_strings":["Advanced Computer Architecture Laboratory, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Advanced Computer Architecture Laboratory, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019903740","display_name":"Fengfeng Fan","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fengfeng Fan","raw_affiliation_strings":["Advanced Computer Architecture Laboratory, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Advanced Computer Architecture Laboratory, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081196646","display_name":"Wenkang Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenkang Yu","raw_affiliation_strings":["Advanced Computer Architecture Laboratory, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Advanced Computer Architecture Laboratory, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053801300","display_name":"Li Jiang","orcid":"https://orcid.org/0000-0002-7353-8798"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Jiang","raw_affiliation_strings":["Advanced Computer Architecture Laboratory, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Advanced Computer Architecture Laboratory, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100323095","display_name":"Chao Li","orcid":"https://orcid.org/0000-0001-6218-4659"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Li","raw_affiliation_strings":["Advanced Computer Architecture Laboratory, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Advanced Computer Architecture Laboratory, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056632010","display_name":"Xiaoyao Liang","orcid":"https://orcid.org/0000-0002-2790-5884"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoyao Liang","raw_affiliation_strings":["Advanced Computer Architecture Laboratory, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Advanced Computer Architecture Laboratory, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5045693138"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":2.02472646,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.89073306,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8606423139572144},{"id":"https://openalex.org/keywords/register-file","display_name":"Register file","score":0.659684419631958},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.5714733004570007},{"id":"https://openalex.org/keywords/cache-only-memory-architecture","display_name":"Cache-only memory architecture","score":0.5651794672012329},{"id":"https://openalex.org/keywords/non-uniform-memory-access","display_name":"Non-uniform memory access","score":0.560259222984314},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5440998077392578},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5222495198249817},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.49265652894973755},{"id":"https://openalex.org/keywords/memory-architecture","display_name":"Memory architecture","score":0.485840767621994},{"id":"https://openalex.org/keywords/uniform-memory-access","display_name":"Uniform memory access","score":0.4284706711769104},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.3948928415775299},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.374931275844574},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3597245216369629},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.3523556590080261},{"id":"https://openalex.org/keywords/cache-coloring","display_name":"Cache coloring","score":0.32899266481399536},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.30174610018730164},{"id":"https://openalex.org/keywords/semiconductor-memory","display_name":"Semiconductor memory","score":0.2114788293838501}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8606423139572144},{"id":"https://openalex.org/C117280010","wikidata":"https://www.wikidata.org/wiki/Q180944","display_name":"Register file","level":3,"score":0.659684419631958},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.5714733004570007},{"id":"https://openalex.org/C3720319","wikidata":"https://www.wikidata.org/wiki/Q5015937","display_name":"Cache-only memory architecture","level":5,"score":0.5651794672012329},{"id":"https://openalex.org/C133371097","wikidata":"https://www.wikidata.org/wiki/Q868014","display_name":"Non-uniform memory access","level":5,"score":0.560259222984314},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5440998077392578},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5222495198249817},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.49265652894973755},{"id":"https://openalex.org/C2779602883","wikidata":"https://www.wikidata.org/wiki/Q15544750","display_name":"Memory architecture","level":2,"score":0.485840767621994},{"id":"https://openalex.org/C51290061","wikidata":"https://www.wikidata.org/wiki/Q1936765","display_name":"Uniform memory access","level":4,"score":0.4284706711769104},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.3948928415775299},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.374931275844574},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3597245216369629},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.3523556590080261},{"id":"https://openalex.org/C201148951","wikidata":"https://www.wikidata.org/wiki/Q5015976","display_name":"Cache coloring","level":4,"score":0.32899266481399536},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.30174610018730164},{"id":"https://openalex.org/C98986596","wikidata":"https://www.wikidata.org/wiki/Q1143031","display_name":"Semiconductor memory","level":2,"score":0.2114788293838501}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/micro.2016.7783717","is_oa":false,"landing_page_url":"https://doi.org/10.1109/micro.2016.7783717","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 49th Annual IEEE/ACM International Symposium on Microarchitecture (MICRO)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8999999761581421,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1678569174","https://openalex.org/W1979527452","https://openalex.org/W1980364632","https://openalex.org/W1982996921","https://openalex.org/W1991518265","https://openalex.org/W2020572638","https://openalex.org/W2022820501","https://openalex.org/W2027806965","https://openalex.org/W2033486618","https://openalex.org/W2034639175","https://openalex.org/W2043083835","https://openalex.org/W2047060659","https://openalex.org/W2059301531","https://openalex.org/W2067441262","https://openalex.org/W2072656003","https://openalex.org/W2072873664","https://openalex.org/W2080592089","https://openalex.org/W2081583983","https://openalex.org/W2084110734","https://openalex.org/W2090584832","https://openalex.org/W2096661534","https://openalex.org/W2110195531","https://openalex.org/W2118513230","https://openalex.org/W2128120785","https://openalex.org/W2160428323","https://openalex.org/W2224192221","https://openalex.org/W2238992335","https://openalex.org/W2502410587","https://openalex.org/W4212894338","https://openalex.org/W4254988766","https://openalex.org/W6637235533","https://openalex.org/W6645179522","https://openalex.org/W6689000625"],"related_works":["https://openalex.org/W1598433531","https://openalex.org/W1974211070","https://openalex.org/W2047684617","https://openalex.org/W2096506606","https://openalex.org/W2145484885","https://openalex.org/W1998129367","https://openalex.org/W2790267391","https://openalex.org/W1965563745","https://openalex.org/W2737923277","https://openalex.org/W2168550483"],"abstract_inverted_index":{"The":[0],"on-chip":[1,33,68,130],"memory":[2,21,34,62,69,131],"design":[3,64],"is":[4,36,44],"critical":[5],"to":[6,47],"the":[7,14,18,31,39,48,60,67,75,88,95,145],"GPGPU":[8,85],"performance":[9,84,124,142],"because":[10],"it":[11,135],"serves":[12],"between":[13],"massive":[15],"threads":[16],"and":[17,25,43,65,94,107,125],"huge":[19],"external":[20],"as":[22],"a":[23,104,108,111],"low-latency":[24],"high-throughput":[26],"data":[27],"communication":[28],"point.":[29],"However,":[30],"existing":[32],"hierarchy":[35,63],"inherited":[37],"from":[38,91],"conventional":[40],"CPU":[41],"architecture":[42,73,100,120],"oftentimes":[45],"sub-optimal":[46],"SIMT":[49],"(single":[50],"instruction,":[51],"multiple":[52],"threads)":[53],"execution.":[54],"In":[55],"this":[56,98,118],"study,":[57],"we":[58],"surpass":[59],"traditional":[61],"reform":[66],"into":[70],"an":[71,138],"integrated":[72,99],"with":[74,128],"cache-emulated":[76],"register":[77],"file":[78],"(RF)":[79],"capability":[80],"tailored":[81],"for":[82,144],"high":[83],"computing.":[86],"With":[87],"lightweight":[89],"support":[90],"ISA,":[92],"compiler":[93],"modified":[96],"microarchitecture,":[97],"can":[101,121,136],"dynamically":[102],"emulate":[103],"variable-sized":[105],"RF":[106],"cache":[109],"in":[110],"uniform":[112],"way.":[113],"Evaluation":[114],"results":[115],"demonstrate":[116],"that":[117],"novel":[119],"deliver":[122],"better":[123],"energy":[126],"efficiency":[127],"smaller":[129],"size.":[132],"For":[133],"example,":[134],"gain":[137],"average":[139],"of":[140],"50%":[141],"improvement":[143],"cache-sensitive":[146],"applications.":[147]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
