{"id":"https://openalex.org/W4248437515","doi":"https://doi.org/10.1109/ipdps.2006.1639621","title":"Remove the memory wall: from performance modeling to architecture optimization","display_name":"Remove the memory wall: from performance modeling to architecture optimization","publication_year":2006,"publication_date":"2006-01-01","ids":{"openalex":"https://openalex.org/W4248437515","doi":"https://doi.org/10.1109/ipdps.2006.1639621"},"language":"en","primary_location":{"id":"doi:10.1109/ipdps.2006.1639621","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps.2006.1639621","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings 20th IEEE International Parallel &amp; Distributed Processing Symposium","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057665558","display_name":"Xian\u2010He Sun","orcid":"https://orcid.org/0000-0002-1093-0792"},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xian-He Sun","raw_affiliation_strings":["Department of Computer Science, Illinois Institute of Technology, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Illinois Institute of Technology, USA","institution_ids":["https://openalex.org/I180949307"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5057665558"],"corresponding_institution_ids":["https://openalex.org/I180949307"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.49138911,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"2 pp.","last_page":"2 pp."},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8728708624839783},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.7115497589111328},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5659880638122559},{"id":"https://openalex.org/keywords/data-access","display_name":"Data access","score":0.5406765937805176},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5208288431167603},{"id":"https://openalex.org/keywords/memory-architecture","display_name":"Memory architecture","score":0.47977834939956665},{"id":"https://openalex.org/keywords/cache-only-memory-architecture","display_name":"Cache-only memory architecture","score":0.47957533597946167},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.47952723503112793},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.41672247648239136},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.4150301218032837},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3808160722255707},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3432024121284485},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.33166980743408203},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3238959312438965},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.25516438484191895},{"id":"https://openalex.org/keywords/cache-coloring","display_name":"Cache coloring","score":0.19106504321098328},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.16310393810272217}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8728708624839783},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.7115497589111328},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5659880638122559},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.5406765937805176},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5208288431167603},{"id":"https://openalex.org/C2779602883","wikidata":"https://www.wikidata.org/wiki/Q15544750","display_name":"Memory architecture","level":2,"score":0.47977834939956665},{"id":"https://openalex.org/C3720319","wikidata":"https://www.wikidata.org/wiki/Q5015937","display_name":"Cache-only memory architecture","level":5,"score":0.47957533597946167},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.47952723503112793},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.41672247648239136},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.4150301218032837},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3808160722255707},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3432024121284485},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.33166980743408203},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3238959312438965},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.25516438484191895},{"id":"https://openalex.org/C201148951","wikidata":"https://www.wikidata.org/wiki/Q5015976","display_name":"Cache coloring","level":4,"score":0.19106504321098328},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.16310393810272217},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ipdps.2006.1639621","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps.2006.1639621","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings 20th IEEE International Parallel &amp; Distributed Processing Symposium","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1525312648","https://openalex.org/W2078720616","https://openalex.org/W4249808101","https://openalex.org/W2543610639","https://openalex.org/W2142110652","https://openalex.org/W2144798271","https://openalex.org/W2011070474","https://openalex.org/W1975698617","https://openalex.org/W2161298819","https://openalex.org/W2360082868"],"abstract_inverted_index":{"Summary":[0],"form":[1],"only":[2,54],"given.":[3],"Data":[4],"access":[5,148,220],"is":[6,107,129,137,201],"a":[7,75,117,140,146,154],"known":[8],"bottleneck":[9,20],"of":[10,18,36,78,85,102,113,205,210,247],"high":[11,56],"performance":[12,23,211,248],"computing":[13],"(HPC).":[14],"The":[15,95],"prime":[16],"sources":[17],"this":[19],"are":[21,193,216],"the":[22,26,32,109,158,169,177,180,202,206,235,242],"gap":[24],"between":[25],"processor":[27],"and":[28,31,43,82,131,214,230,244],"memory":[29,34,41,97,161],"storage":[30],"large":[33,76],"requirements":[35],"ever-hungry":[37],"applications.":[38],"Although":[39],"advanced":[40],"hierarchies":[42],"parallel":[44],"file":[45,155],"systems":[46],"have":[47],"been":[48],"developed":[49],"in":[50,105,144,250],"recent":[51],"years,":[52],"they":[53],"provide":[55],"bandwidth":[57],"for":[58,65,80,121,188,195],"contiguous,":[59],"well-formed":[60],"data":[61,128,147,152,219,226],"streams,":[62],"performing":[63],"poorly":[64],"accessing":[66],"small,":[67],"noncontiguous":[68,83],"data.":[69],"Unfortunately,":[70],"many":[71],"HPC":[72],"applications":[73,191],"make":[74],"number":[77],"requests":[79],"small":[81],"pieces":[84],"data,":[86],"as":[87,93],"do":[88],"high-level":[89],"I/O":[90,119,125],"libraries":[91],"such":[92],"HDF-5.":[94],"problematic":[96],"wall":[98],"remains":[99],"after":[100],"years":[101],"study":[103],"and,":[104],"fact,":[106],"becoming":[108],"most":[110],"important":[111],"issue":[112],"HPC.":[114,122],"We":[115],"propose":[116],"new":[118,178],"architecture":[120,136,170,231],"Unlike":[123],"traditional":[124],"designs":[126],"where":[127],"stored":[130],"retrieved":[132],"by":[133],"request,":[134],"our":[135],"based":[138,167],"on":[139,168],"novel":[141],"\"server-push\"":[142,236],"model":[143],"which":[145],"server":[149,156],"proactively":[150],"pushes":[151],"from":[153],"to":[157,163,218,233],"compute":[159],"node's":[160],"or":[162],"it's":[164],"cache":[165,181,197],"directly":[166],"design.":[171],"Simulation":[172],"results":[173],"show":[174],"that":[175,192],"with":[176],"approach":[179],"hit":[182],"rates":[183],"increase":[184],"well":[185],"above":[186],"90%":[187],"various":[189],"benchmark":[190],"notorious":[194],"poor":[196],"performance.":[198],"Performance":[199],"evaluation":[200,249],"driven":[203],"force":[204],"push-based":[207],"model.":[208,237],"Mechanisms":[209],"modeling,":[212],"evaluation,":[213],"optimization":[215,232],"applied":[217],"pattern":[221],"identification,":[222],"prefetching":[223],"algorithm":[224],"design,":[225],"replacement":[227],"strategy":[228],"development,":[229],"enable":[234],"Our":[238],"current":[239],"success":[240],"illustrates":[241],"power":[243],"unique":[245],"role":[246],"computing.":[251]},"counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
