{"id":"https://openalex.org/W2330672121","doi":"https://doi.org/10.1109/hpca.2016.7446077","title":"Towards high performance paged memory for GPUs","display_name":"Towards high performance paged memory for GPUs","publication_year":2016,"publication_date":"2016-03-01","ids":{"openalex":"https://openalex.org/W2330672121","doi":"https://doi.org/10.1109/hpca.2016.7446077","mag":"2330672121"},"language":"en","primary_location":{"id":"doi:10.1109/hpca.2016.7446077","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca.2016.7446077","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080335497","display_name":"Tianhao Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Tianhao Zheng","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031781240","display_name":"David Nellans","orcid":"https://orcid.org/0000-0001-5203-8367"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"David Nellans","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065112065","display_name":"Arslan Zulfiqar","orcid":"https://orcid.org/0009-0003-6240-5900"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arslan Zulfiqar","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015391591","display_name":"Mark W. Stephenson","orcid":"https://orcid.org/0000-0002-1350-0165"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mark Stephenson","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112796474","display_name":"Stephen W. Keckler","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stephen W. Keckler","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5080335497"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":15.745,"has_fulltext":false,"cited_by_count":120,"citation_normalized_percentile":{"value":0.99558174,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"345","last_page":"357"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.873222827911377},{"id":"https://openalex.org/keywords/programmer","display_name":"Programmer","score":0.643455982208252},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6380024552345276},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5962983965873718},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.5085865259170532},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.46352845430374146},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.4625261425971985},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.4451165795326233},{"id":"https://openalex.org/keywords/uniform-memory-access","display_name":"Uniform memory access","score":0.4416508972644806},{"id":"https://openalex.org/keywords/pci-express","display_name":"PCI Express","score":0.4415631890296936},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.41298067569732666},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.35337743163108826},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.2688681185245514},{"id":"https://openalex.org/keywords/semiconductor-memory","display_name":"Semiconductor memory","score":0.17803379893302917},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.09871086478233337}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.873222827911377},{"id":"https://openalex.org/C2778514511","wikidata":"https://www.wikidata.org/wiki/Q1374194","display_name":"Programmer","level":2,"score":0.643455982208252},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6380024552345276},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5962983965873718},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.5085865259170532},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.46352845430374146},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.4625261425971985},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.4451165795326233},{"id":"https://openalex.org/C51290061","wikidata":"https://www.wikidata.org/wiki/Q1936765","display_name":"Uniform memory access","level":4,"score":0.4416508972644806},{"id":"https://openalex.org/C64270927","wikidata":"https://www.wikidata.org/wiki/Q206924","display_name":"PCI Express","level":3,"score":0.4415631890296936},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.41298067569732666},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.35337743163108826},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.2688681185245514},{"id":"https://openalex.org/C98986596","wikidata":"https://www.wikidata.org/wiki/Q1143031","display_name":"Semiconductor memory","level":2,"score":0.17803379893302917},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.09871086478233337}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca.2016.7446077","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca.2016.7446077","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":59,"referenced_works":["https://openalex.org/W48345810","https://openalex.org/W1564523715","https://openalex.org/W1979527452","https://openalex.org/W1979717209","https://openalex.org/W1985539519","https://openalex.org/W1990962327","https://openalex.org/W1994023216","https://openalex.org/W2001148043","https://openalex.org/W2003838150","https://openalex.org/W2020733012","https://openalex.org/W2029618796","https://openalex.org/W2029764709","https://openalex.org/W2037291093","https://openalex.org/W2041679393","https://openalex.org/W2047390994","https://openalex.org/W2059290792","https://openalex.org/W2062430565","https://openalex.org/W2078081736","https://openalex.org/W2080592089","https://openalex.org/W2082000934","https://openalex.org/W2095954861","https://openalex.org/W2095957868","https://openalex.org/W2097773861","https://openalex.org/W2098278566","https://openalex.org/W2100415730","https://openalex.org/W2100926301","https://openalex.org/W2102843684","https://openalex.org/W2104305170","https://openalex.org/W2106342588","https://openalex.org/W2106579652","https://openalex.org/W2119473230","https://openalex.org/W2123815799","https://openalex.org/W2140455011","https://openalex.org/W2147926533","https://openalex.org/W2150143212","https://openalex.org/W2152659795","https://openalex.org/W2157802978","https://openalex.org/W2164645802","https://openalex.org/W2166620913","https://openalex.org/W2168214303","https://openalex.org/W2169665207","https://openalex.org/W2170666978","https://openalex.org/W2175443198","https://openalex.org/W2237817213","https://openalex.org/W2273440736","https://openalex.org/W2337228275","https://openalex.org/W3112651258","https://openalex.org/W3138617740","https://openalex.org/W4238398050","https://openalex.org/W4240262711","https://openalex.org/W4250511340","https://openalex.org/W4253494365","https://openalex.org/W6601941235","https://openalex.org/W6633941944","https://openalex.org/W6645075276","https://openalex.org/W6684730004","https://openalex.org/W6690040212","https://openalex.org/W6694513646","https://openalex.org/W6703177460"],"related_works":["https://openalex.org/W2026512611","https://openalex.org/W3151393245","https://openalex.org/W2594155508","https://openalex.org/W2545380179","https://openalex.org/W2353073543","https://openalex.org/W2811273125","https://openalex.org/W87257424","https://openalex.org/W79990711","https://openalex.org/W3012471169","https://openalex.org/W2070652215"],"abstract_inverted_index":{"Despite":[0],"industrial":[1],"investment":[2],"in":[3,58,80,123,135],"both":[4],"on-die":[5],"GPUs":[6,27],"and":[7,56,81,90,110,138,221],"next":[8],"generation":[9],"interconnects,":[10],"the":[11,49,54,61,84,97,132,142,157,169,194],"highest":[12],"performing":[13],"parallel":[14],"accelerators":[15],"shipping":[16],"today":[17],"continue":[18],"to":[19,47,60,115,140,166,187,209,215],"be":[20],"discrete":[21],"GPUs.":[22],"Connected":[23],"via":[24],"PCIe,":[25],"these":[26,118],"utilize":[28],"their":[29,217],"own":[30],"privately":[31],"managed":[32],"physical":[33],"memory":[34,64,79,99,103,148,153,199],"that":[35,76,95,197],"is":[36,164],"optimized":[37],"for":[38],"high":[39,108],"bandwidth.":[40],"These":[41],"separate":[42],"memories":[43],"force":[44],"GPU":[45,63,70,85,98,116,136,146,158,173,198,222],"programmers":[46],"manage":[48],"movement":[50],"of":[51,83,145,172],"data":[52],"between":[53],"CPU":[55],"GPU,":[57],"addition":[59],"on-chip":[62],"hierarchy.":[65],"To":[66],"simplify":[67],"this":[68,102,128],"process,":[69],"vendors":[71],"are":[72],"developing":[73],"software":[74,119,139],"runtimes":[75,120],"automatically":[77],"page":[78],"out":[82],"on-demand,":[86],"reducing":[87],"programmer":[88,151,188],"effort":[89],"enabling":[91,206],"computation":[92],"across":[93,219],"datasets":[94,218],"exceed":[96],"capacity.":[100],"Because":[101],"migration":[104],"occurs":[105],"over":[106],"a":[107,182],"latency":[109],"low":[111],"bandwidth":[112],"link":[113],"(compared":[114],"memory),":[117],"may":[121],"result":[122],"significant":[124],"performance":[125,143,170,195],"penalties.":[126],"In":[127],"work,":[129],"we":[130,161,192],"explore":[131],"features":[133],"needed":[134],"hardware":[137],"close":[141],"gap":[144],"paged":[147,174],"versus":[149],"legacy":[150],"directed":[152,189],"management.":[154],"Without":[155],"modifying":[156],"execution":[159],"pipeline,":[160],"show":[162],"it":[163],"possible":[165],"largely":[167],"hide":[168],"overheads":[171],"memory,":[175],"converting":[176],"an":[177],"average":[178],"2\u00d7":[179],"slowdown":[180],"into":[181],"12%":[183],"speedup":[184],"when":[185],"compared":[186],"transfers.":[190],"Additionally,":[191],"examine":[193],"impact":[196],"oversubscription":[200],"has":[201],"on":[202,213],"application":[203,207],"run":[204],"times,":[205],"designers":[208],"make":[210],"informed":[211],"decisions":[212],"how":[214],"shard":[216],"hosts":[220],"instances.":[223]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":11},{"year":2020,"cited_by_count":22},{"year":2019,"cited_by_count":17},{"year":2018,"cited_by_count":17},{"year":2017,"cited_by_count":13},{"year":2016,"cited_by_count":2}],"updated_date":"2026-03-06T13:50:29.536080","created_date":"2025-10-10T00:00:00"}
