{"id":"https://openalex.org/W2768065515","doi":"https://doi.org/10.1145/3126908.3126931","title":"Exploring and analyzing the real impact of modern on-package memory on HPC scientific kernels","display_name":"Exploring and analyzing the real impact of modern on-package memory on HPC scientific kernels","publication_year":2017,"publication_date":"2017-11-08","ids":{"openalex":"https://openalex.org/W2768065515","doi":"https://doi.org/10.1145/3126908.3126931","mag":"2768065515"},"language":"en","primary_location":{"id":"doi:10.1145/3126908.3126931","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3126908.3126931","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100413657","display_name":"Ang Li","orcid":"https://orcid.org/0000-0003-3734-9137"},"institutions":[{"id":"https://openalex.org/I142606810","display_name":"Pacific Northwest National Laboratory","ror":"https://ror.org/05h992307","country_code":"US","type":"facility","lineage":["https://openalex.org/I1325736334","https://openalex.org/I1330989302","https://openalex.org/I142606810","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ang Li","raw_affiliation_strings":["Pacific Northwest National Lab"],"affiliations":[{"raw_affiliation_string":"Pacific Northwest National Lab","institution_ids":["https://openalex.org/I142606810"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100444152","display_name":"Weifeng Liu","orcid":"https://orcid.org/0000-0002-2150-5759"},"institutions":[{"id":"https://openalex.org/I124055696","display_name":"University of Copenhagen","ror":"https://ror.org/035b05819","country_code":"DK","type":"education","lineage":["https://openalex.org/I124055696"]},{"id":"https://openalex.org/I204778367","display_name":"Norwegian University of Science and Technology","ror":"https://ror.org/05xg72x27","country_code":"NO","type":"education","lineage":["https://openalex.org/I204778367"]}],"countries":["DK","NO"],"is_corresponding":false,"raw_author_name":"Weifeng Liu","raw_affiliation_strings":["University of Copenhagen, Denmark and Norwegian University of Science and Technology, Norway"],"affiliations":[{"raw_affiliation_string":"University of Copenhagen, Denmark and Norwegian University of Science and Technology, Norway","institution_ids":["https://openalex.org/I204778367","https://openalex.org/I124055696"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051145997","display_name":"Mads Ruben Burgdorff Kristensen","orcid":"https://orcid.org/0000-0001-6079-7742"},"institutions":[{"id":"https://openalex.org/I124055696","display_name":"University of Copenhagen","ror":"https://ror.org/035b05819","country_code":"DK","type":"education","lineage":["https://openalex.org/I124055696"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Mads R. B. Kristensen","raw_affiliation_strings":["University of Copenhagen, Denmark"],"affiliations":[{"raw_affiliation_string":"University of Copenhagen, Denmark","institution_ids":["https://openalex.org/I124055696"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007385908","display_name":"Brian Vinter","orcid":"https://orcid.org/0000-0002-3947-9878"},"institutions":[{"id":"https://openalex.org/I124055696","display_name":"University of Copenhagen","ror":"https://ror.org/035b05819","country_code":"DK","type":"education","lineage":["https://openalex.org/I124055696"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Brian Vinter","raw_affiliation_strings":["University of Copenhagen, Denmark"],"affiliations":[{"raw_affiliation_string":"University of Copenhagen, Denmark","institution_ids":["https://openalex.org/I124055696"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100446064","display_name":"Hao Wang","orcid":"https://orcid.org/0000-0001-9301-5989"},"institutions":[{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"education","lineage":["https://openalex.org/I859038795"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hao Wang","raw_affiliation_strings":["Virginia Tech"],"affiliations":[{"raw_affiliation_string":"Virginia Tech","institution_ids":["https://openalex.org/I859038795"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047871879","display_name":"Kaixi Hou","orcid":"https://orcid.org/0000-0003-3921-6709"},"institutions":[{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"education","lineage":["https://openalex.org/I859038795"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kaixi Hou","raw_affiliation_strings":["Virginia Tech"],"affiliations":[{"raw_affiliation_string":"Virginia Tech","institution_ids":["https://openalex.org/I859038795"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102811479","display_name":"Andr\u00e9s M\u00e1rquez","orcid":"https://orcid.org/0000-0002-4313-1882"},"institutions":[{"id":"https://openalex.org/I142606810","display_name":"Pacific Northwest National Laboratory","ror":"https://ror.org/05h992307","country_code":"US","type":"facility","lineage":["https://openalex.org/I1325736334","https://openalex.org/I1330989302","https://openalex.org/I142606810","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andres Marquez","raw_affiliation_strings":["Pacific Northwest National Lab"],"affiliations":[{"raw_affiliation_string":"Pacific Northwest National Lab","institution_ids":["https://openalex.org/I142606810"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043209884","display_name":"Shuaiwen Leon Song","orcid":"https://orcid.org/0000-0002-8402-1436"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shuaiwen Leon Song","raw_affiliation_strings":["Pacific Northwest National Lab and College of William and Mary"],"affiliations":[{"raw_affiliation_string":"Pacific Northwest National Lab and College of William and Mary","institution_ids":["https://openalex.org/I16285277"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5100413657"],"corresponding_institution_ids":["https://openalex.org/I142606810"],"apc_list":null,"apc_paid":null,"fwci":9.0337,"has_fulltext":false,"cited_by_count":48,"citation_normalized_percentile":{"value":0.98644578,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"14"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8049668073654175},{"id":"https://openalex.org/keywords/dram","display_name":"Dram","score":0.7201103568077087},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.6238713264465332},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.6211522817611694},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5641334056854248},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.49837207794189453},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.448262095451355},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.4232296347618103},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.38062429428100586},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.18122950196266174}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8049668073654175},{"id":"https://openalex.org/C7366592","wikidata":"https://www.wikidata.org/wiki/Q1255620","display_name":"Dram","level":2,"score":0.7201103568077087},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.6238713264465332},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.6211522817611694},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5641334056854248},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.49837207794189453},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.448262095451355},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.4232296347618103},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.38062429428100586},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.18122950196266174},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3126908.3126931","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3126908.3126931","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.atira.dk:openaire_cris_publications/57f47103-b79a-4f4b-8246-e0db6aeec9bf","is_oa":false,"landing_page_url":"https://researchprofiles.ku.dk/da/publications/57f47103-b79a-4f4b-8246-e0db6aeec9bf","pdf_url":null,"source":{"id":"https://openalex.org/S4306401983","display_name":"Research at the University of Copenhagen (University of Copenhagen)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I124055696","host_organization_name":"University of Copenhagen","host_organization_lineage":["https://openalex.org/I124055696"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Li , A , Liu , W , Kristensen , M RB , Vinter , B , Wang , H , Hou , K , Marquez , A & Song , S L 2017 , Exploring and analyzing the real impact of modern on-package memory on HPC scientific kernels . in Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC . vol. 17 , Denver, USA . https://doi.org/10.1145/3126908.3126931","raw_type":"bookPart"},{"id":"pmh:oai:pure.atira.dk:publications/57f47103-b79a-4f4b-8246-e0db6aeec9bf","is_oa":false,"landing_page_url":"https://curis.ku.dk/portal/da/publications/exploring-and-analyzing-the-real-impact-of-modern-onpackage-memory-on-hpc-scientific-kernels(57f47103-b79a-4f4b-8246-e0db6aeec9bf).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306401983","display_name":"Research at the University of Copenhagen (University of Copenhagen)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I124055696","host_organization_name":"University of Copenhagen","host_organization_lineage":["https://openalex.org/I124055696"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Li , A , Liu , W , Kristensen , M RB , Vinter , B , Wang , H , Hou , K , Marquez , A & Song , S L 2017 , Exploring and analyzing the real impact of modern on-package memory on HPC scientific kernels . in Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC . vol. 17 , Denver, USA . https://doi.org/10.1145/3126908.3126931","raw_type":"bookPart"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W312067013","https://openalex.org/W348433680","https://openalex.org/W993511226","https://openalex.org/W1491237044","https://openalex.org/W1940446526","https://openalex.org/W1977443758","https://openalex.org/W1977661221","https://openalex.org/W1986989509","https://openalex.org/W1989608846","https://openalex.org/W2002555321","https://openalex.org/W2009654791","https://openalex.org/W2016279572","https://openalex.org/W2035080386","https://openalex.org/W2038355886","https://openalex.org/W2054716548","https://openalex.org/W2067354926","https://openalex.org/W2081526957","https://openalex.org/W2090032487","https://openalex.org/W2097538613","https://openalex.org/W2100618668","https://openalex.org/W2102182691","https://openalex.org/W2103498248","https://openalex.org/W2103817093","https://openalex.org/W2105524676","https://openalex.org/W2115052535","https://openalex.org/W2124480634","https://openalex.org/W2124853018","https://openalex.org/W2128022558","https://openalex.org/W2146451305","https://openalex.org/W2157587823","https://openalex.org/W2161091390","https://openalex.org/W2168931017","https://openalex.org/W2253855427","https://openalex.org/W2269813923","https://openalex.org/W2276719940","https://openalex.org/W2294693415","https://openalex.org/W2340076492","https://openalex.org/W2349022273","https://openalex.org/W2411480360","https://openalex.org/W2469975815","https://openalex.org/W2483772785","https://openalex.org/W2505067803","https://openalex.org/W2529487057","https://openalex.org/W2561650451","https://openalex.org/W2620106252","https://openalex.org/W2623016866","https://openalex.org/W2724545582","https://openalex.org/W2746871167","https://openalex.org/W3023819163","https://openalex.org/W3125710003","https://openalex.org/W4249057194","https://openalex.org/W4285719527","https://openalex.org/W6991883878"],"related_works":["https://openalex.org/W1975522091","https://openalex.org/W2077105843","https://openalex.org/W2735130281","https://openalex.org/W1990309876","https://openalex.org/W2140286994","https://openalex.org/W79990711","https://openalex.org/W575672070","https://openalex.org/W4295935130","https://openalex.org/W1480947737","https://openalex.org/W2953368509"],"abstract_inverted_index":{"High-bandwidth":[0],"On-Package":[1],"Memory":[2],"(OPM)":[3],"innovates":[4],"the":[5,40,45],"conventional":[6],"memory":[7],"hierarchy":[8],"by":[9,68,107],"augmenting":[10],"a":[11,34,70,74,81],"new":[12,35],"on-package":[13],"layer":[14],"between":[15],"classic":[16],"on-chip":[17],"cache":[18],"and":[19,27,47,90,100,128],"off-chip":[20],"DRAM.":[21],"Due":[22],"to":[23,121],"its":[24],"relative":[25],"location":[26],"capacity,":[28],"OPM":[29,51],"is":[30,58],"often":[31],"used":[32],"as":[33],"type":[36],"of":[37,50,77,84],"LLC.":[38],"Despite":[39],"adaptation":[41],"in":[42],"modern":[43],"processors,":[44],"performance":[46],"power":[48],"impact":[49],"on":[52,92,97,102],"HPC":[53],"applications,":[54],"especially":[55],"scientific":[56,78],"kernels,":[57],"still":[59],"unknown.":[60],"In":[61],"this":[62,66],"paper,":[63],"we":[64,112],"fill":[65],"gap":[67],"conducting":[69],"comprehensive":[71],"evaluation":[72],"for":[73,116,125],"wide":[75],"spectrum":[76],"kernels":[79],"with":[80],"large":[82],"amount":[83],"representative":[85],"inputs,":[86],"including":[87],"dense,":[88],"sparse":[89],"medium,":[91],"two":[93],"Intel":[94],"OPMs:":[95],"eDRAM":[96],"multicore":[98],"Broadwell":[99],"MCDRAM":[101],"manycore":[103],"Knights":[104],"Landing.":[105],"Guided":[106],"our":[108],"general":[109],"optimization":[110],"models,":[111],"demonstrate":[113],"OPM's":[114],"effectiveness":[115],"easing":[117],"programmers'":[118],"tuning":[119],"efforts":[120],"reach":[122],"ideal":[123],"throughput":[124],"both":[126],"compute-bound":[127],"memory-bound":[129],"applications.":[130]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":12},{"year":2018,"cited_by_count":17},{"year":2017,"cited_by_count":2}],"updated_date":"2026-03-29T08:15:47.926485","created_date":"2025-10-10T00:00:00"}
