{"id":"https://openalex.org/W3102249078","doi":"https://doi.org/10.1109/tcsi.2020.3031895","title":"Exploring Applications of STT-RAM in GPU Architectures","display_name":"Exploring Applications of STT-RAM in GPU Architectures","publication_year":2020,"publication_date":"2020-11-16","ids":{"openalex":"https://openalex.org/W3102249078","doi":"https://doi.org/10.1109/tcsi.2020.3031895","mag":"3102249078"},"language":"en","primary_location":{"id":"doi:10.1109/tcsi.2020.3031895","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsi.2020.3031895","pdf_url":null,"source":{"id":"https://openalex.org/S116977442","display_name":"IEEE Transactions on Circuits and Systems I Regular Papers","issn_l":"1549-8328","issn":["1549-8328","1558-0806"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems I: Regular Papers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100338879","display_name":"Xiaoxiao Liu","orcid":"https://orcid.org/0000-0001-6325-1796"},"institutions":[{"id":"https://openalex.org/I4210137977","display_name":"Advanced Micro Devices (United States)","ror":"https://ror.org/04kd6c783","country_code":"US","type":"company","lineage":["https://openalex.org/I4210137977"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xiaoxiao Liu","raw_affiliation_strings":["AMD Inc., Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"AMD Inc., Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210137977"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005196528","display_name":"Mengjie Mao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mengjie Mao","raw_affiliation_strings":["Waymo LLC, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Waymo LLC, Mountain View, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066400911","display_name":"Xiuyuan Bi","orcid":"https://orcid.org/0000-0002-7401-6764"},"institutions":[{"id":"https://openalex.org/I2801368622","display_name":"Sesame Workshop","ror":"https://ror.org/04h2yy041","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I2801368622"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiuyuan Bi","raw_affiliation_strings":["Black Sesame Technologies, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"Black Sesame Technologies, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I2801368622"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100429403","display_name":"Hai Li","orcid":"https://orcid.org/0000-0003-3228-6544"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hai Li","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Duke University, Durham, NC, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Duke University, Durham, NC, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058073627","display_name":"Yiran Chen","orcid":"https://orcid.org/0000-0002-1486-8412"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yiran Chen","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Duke University, Durham, NC, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Duke University, Durham, NC, USA","institution_ids":["https://openalex.org/I170897317"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100338879"],"corresponding_institution_ids":["https://openalex.org/I4210137977"],"apc_list":null,"apc_paid":null,"fwci":1.3862,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.81315331,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"68","issue":"1","first_page":"238","last_page":"249"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8130338191986084},{"id":"https://openalex.org/keywords/register-file","display_name":"Register file","score":0.588283360004425},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.5736958980560303},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5678752660751343},{"id":"https://openalex.org/keywords/static-random-access-memory","display_name":"Static random-access memory","score":0.5293339490890503},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.47729337215423584},{"id":"https://openalex.org/keywords/magnetoresistive-random-access-memory","display_name":"Magnetoresistive random-access memory","score":0.4591178297996521},{"id":"https://openalex.org/keywords/row","display_name":"Row","score":0.4323379099369049},{"id":"https://openalex.org/keywords/chip","display_name":"Chip","score":0.4232061207294464},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.38856568932533264},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.37745529413223267},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.2650189697742462},{"id":"https://openalex.org/keywords/random-access-memory","display_name":"Random access memory","score":0.18136921525001526},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.13116353750228882}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8130338191986084},{"id":"https://openalex.org/C117280010","wikidata":"https://www.wikidata.org/wiki/Q180944","display_name":"Register file","level":3,"score":0.588283360004425},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.5736958980560303},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5678752660751343},{"id":"https://openalex.org/C68043766","wikidata":"https://www.wikidata.org/wiki/Q267416","display_name":"Static random-access memory","level":2,"score":0.5293339490890503},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.47729337215423584},{"id":"https://openalex.org/C46891859","wikidata":"https://www.wikidata.org/wiki/Q1061546","display_name":"Magnetoresistive random-access memory","level":3,"score":0.4591178297996521},{"id":"https://openalex.org/C135598885","wikidata":"https://www.wikidata.org/wiki/Q1366302","display_name":"Row","level":2,"score":0.4323379099369049},{"id":"https://openalex.org/C165005293","wikidata":"https://www.wikidata.org/wiki/Q1074500","display_name":"Chip","level":2,"score":0.4232061207294464},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.38856568932533264},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.37745529413223267},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.2650189697742462},{"id":"https://openalex.org/C2994168587","wikidata":"https://www.wikidata.org/wiki/Q5295","display_name":"Random access memory","level":2,"score":0.18136921525001526},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.13116353750228882},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsi.2020.3031895","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsi.2020.3031895","pdf_url":null,"source":{"id":"https://openalex.org/S116977442","display_name":"IEEE Transactions on Circuits and Systems I Regular Papers","issn_l":"1549-8328","issn":["1549-8328","1558-0806"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems I: Regular Papers","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.8999999761581421,"display_name":"Affordable and clean energy"}],"awards":[{"id":"https://openalex.org/G1413169137","display_name":null,"funder_award_id":"1955246","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3677370297","display_name":null,"funder_award_id":"1910299","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4789755018","display_name":null,"funder_award_id":"1725456","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8940334949","display_name":null,"funder_award_id":"DE-SC0018064","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1482635212","https://openalex.org/W1976534780","https://openalex.org/W1979527452","https://openalex.org/W1980034734","https://openalex.org/W1980364632","https://openalex.org/W1985330137","https://openalex.org/W1992851788","https://openalex.org/W2010202670","https://openalex.org/W2012025286","https://openalex.org/W2022632182","https://openalex.org/W2034639175","https://openalex.org/W2043465878","https://openalex.org/W2053331999","https://openalex.org/W2059301531","https://openalex.org/W2069345435","https://openalex.org/W2072656003","https://openalex.org/W2080592089","https://openalex.org/W2081373884","https://openalex.org/W2084110734","https://openalex.org/W2093043622","https://openalex.org/W2101717804","https://openalex.org/W2105768042","https://openalex.org/W2106738517","https://openalex.org/W2116826022","https://openalex.org/W2120246901","https://openalex.org/W2139052244","https://openalex.org/W2203111322","https://openalex.org/W2273440736","https://openalex.org/W2403204284","https://openalex.org/W2543205889","https://openalex.org/W2547755356","https://openalex.org/W2598176114","https://openalex.org/W2941172511","https://openalex.org/W3143249011","https://openalex.org/W4238440425","https://openalex.org/W6645179522","https://openalex.org/W6694513646","https://openalex.org/W6734885788"],"related_works":["https://openalex.org/W1594563977","https://openalex.org/W1569109055","https://openalex.org/W1532146045","https://openalex.org/W154693229","https://openalex.org/W2622060226","https://openalex.org/W1579918296","https://openalex.org/W4256298930","https://openalex.org/W1589529336","https://openalex.org/W2371858336","https://openalex.org/W2350590545"],"abstract_inverted_index":{"Use":[0],"of":[1,15,33,79,95,148,157,168,171,176,222,231,257,280,297,304,314],"modern":[2,23],"GPUs":[3],"has":[4,132],"been":[5],"extended":[6],"from":[7],"traditional":[8],"3D":[9],"graphic":[10],"processing":[11],"to":[12,30,46,66,104,116,126,145,213,252,261,292,342,351],"computing":[13],"acceleration":[14],"many":[16],"scientific,":[17],"engineering,":[18],"and":[19,58,90,120,154,173,190,204,318,336,359],"enterprise":[20],"applications.":[21],"In":[22,99],"GPUs,":[24,80],"on-chip":[25],"memory":[26,62,77,87,122,160,192,201,300,306,349],"capacity":[27,78,241],"keeps":[28],"increasing":[29],"support":[31],"thousands":[32],"chip-resident":[34],"threads.":[35],"For":[36],"example,":[37],"a":[38,59,133,177,182,196,239,248],"large":[39,60,86],"register":[40,118,188,263,267,325],"file":[41,119,189,326],"is":[42,63,82,210,242,270,289],"needed":[43],"in":[44,51,123,166,187,265],"order":[45],"efficiently":[47],"process":[48],"highly-parallel":[49],"threads":[50,72],"single":[52],"instruction":[53],"multiple":[54],"thread":[55],"(SIMT)":[56],"fashion,":[57],"shared":[61,121,191,299,348],"often":[64],"implemented":[65,159],"allow":[67],"data":[68,184,209],"sharing":[69],"among":[70],"the":[71,74,106,146,152,158,200,205,214,219,223,227,232,254,258,266,273,278,281,294,298,302,305],"on":[73,312,322],"chip.":[75],"On-chip":[76],"however,":[81],"highly":[83],"constrained":[84],"by":[85,272],"cell":[88,109,136],"area":[89,137],"high":[91],"static":[92],"power":[93,156],"consumption":[94],"conventional":[96,343],"SRAM":[97],"implementation.":[98],"this":[100],"work,":[101],"we":[102,180],"propose":[103,181],"utilize":[105],"emerging":[107],"multi-level":[108],"(MLC)":[110],"spin-transfer":[111],"torque":[112],"RAM":[113],"(STT-RAM)":[114],"technology":[115,288],"implement":[117],"GPUs.":[124],"Compared":[125],"SRAM,":[127],"MLC":[128],"STT-RAM":[129],"(or":[130],"MLC-STT)":[131],"much":[134],"smaller":[135],"as":[138,140],"well":[139],"ultra-low":[141],"standby":[142],"power,":[143],"thanks":[144],"non-volatility":[147],"MLC-STT":[149,178,224,324,347],"technology.":[150],"Hence,":[151],"footprint":[153],"leakage":[155],"components":[161],"are":[162,235],"substantially":[163],"reduced.":[164],"Moreover,":[165],"light":[167],"asymmetric":[169],"performance":[170,331,354],"soft":[172,220],"hard":[174,233],"bits":[175,221,234,303],"cell,":[179],"dynamic":[183],"remapping":[185],"strategy":[186],"implementations":[193],"that":[194,321],"allows":[195],"flexible":[197],"tradeoff":[198],"between":[199],"access":[202,262],"time":[203,256],"available":[206],"capacity:":[207],"frequently-accessed":[208],"always":[211],"mapped":[212],"fast":[215],"rows":[216,229],"built":[217],"with":[218],"cells":[225],"while":[226],"slow":[228],"composed":[230],"used":[236],"only":[237],"when":[238],"larger":[240],"critically":[243],"needed.":[244],"We":[245],"also":[246,290],"develop":[247],"novel":[249],"rescheduling":[250],"scheme":[251],"minimize":[253],"waiting":[255],"issued":[259,282],"warps":[260],"banks":[264],"file,":[268],"which":[269],"induced":[271],"long":[274],"writeback":[275],"operations":[276],"through":[277],"reordering":[279],"warps.":[283],"Finally,":[284],"an":[285],"early":[286],"termination":[287],"applied":[291],"save":[293],"write":[295],"energy":[296,334,338,357,361],"if":[301],"do":[307],"not":[308],"flip.":[309],"Experimental":[310],"results":[311],"benchmarks":[313],"ISPASS2009,":[315],"Rodinia,":[316],"Parboil,":[317],"CUDA":[319],"show":[320],"average,":[323],"can":[327],"achieve":[328],"3.28%":[329],"system":[330,353],"improvement,":[332,355],"9.48%":[333],"reduction,":[335,358],"38.9%":[337],"efficiency":[339,362],"improvement":[340],"compared":[341],"SRAM-based":[344],"design.":[345],"Meanwhile,":[346],"leads":[350],"3.45%":[352],"49.3%":[356],"116%":[360],"improvement.":[363]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
