{"id":"https://openalex.org/W2112532446","doi":"https://doi.org/10.1145/2687356","title":"Measuring Microarchitectural Details of Multi- and Many-Core Memory Systems through Microbenchmarking","display_name":"Measuring Microarchitectural Details of Multi- and Many-Core Memory Systems through Microbenchmarking","publication_year":2015,"publication_date":"2015-01-09","ids":{"openalex":"https://openalex.org/W2112532446","doi":"https://doi.org/10.1145/2687356","mag":"2112532446"},"language":"en","primary_location":{"id":"doi:10.1145/2687356","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2687356","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2687356","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/2687356","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065889904","display_name":"Zhenman Fang","orcid":"https://orcid.org/0000-0003-0603-9697"},"institutions":[{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zhenman Fang","raw_affiliation_strings":["University of California Los Angeles"],"affiliations":[{"raw_affiliation_string":"University of California Los Angeles","institution_ids":["https://openalex.org/I161318765"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102742730","display_name":"Sanyam Mehta","orcid":"https://orcid.org/0009-0005-5319-689X"},"institutions":[{"id":"https://openalex.org/I130238516","display_name":"University of Minnesota","ror":"https://ror.org/017zqws13","country_code":"US","type":"education","lineage":["https://openalex.org/I130238516"]},{"id":"https://openalex.org/I4210101327","display_name":"Twin Cities Orthopedics","ror":"https://ror.org/01en4s460","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210101327"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sanyam Mehta","raw_affiliation_strings":["University of Minnesota, Twin Cities"],"affiliations":[{"raw_affiliation_string":"University of Minnesota, Twin Cities","institution_ids":["https://openalex.org/I4210101327","https://openalex.org/I130238516"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052005800","display_name":"Pen-Chung Yew","orcid":"https://orcid.org/0000-0001-9653-8777"},"institutions":[{"id":"https://openalex.org/I2800403580","display_name":"University of Minnesota System","ror":"https://ror.org/03grvy078","country_code":"US","type":"education","lineage":["https://openalex.org/I2800403580"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pen-Chung Yew","raw_affiliation_strings":["University of Minnesota"],"affiliations":[{"raw_affiliation_string":"University of Minnesota","institution_ids":["https://openalex.org/I2800403580"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053597061","display_name":"Antonia Zhai","orcid":"https://orcid.org/0000-0002-8921-1415"},"institutions":[{"id":"https://openalex.org/I2800403580","display_name":"University of Minnesota System","ror":"https://ror.org/03grvy078","country_code":"US","type":"education","lineage":["https://openalex.org/I2800403580"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Antonia Zhai","raw_affiliation_strings":["University of Minnesota"],"affiliations":[{"raw_affiliation_string":"University of Minnesota","institution_ids":["https://openalex.org/I2800403580"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005013409","display_name":"James Greensky","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]},{"id":"https://openalex.org/I4210158342","display_name":"Intel (United Kingdom)","ror":"https://ror.org/058cxws58","country_code":"GB","type":"company","lineage":["https://openalex.org/I1343180700","https://openalex.org/I4210158342"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"James Greensky","raw_affiliation_strings":["Intel","Intel#TAB#"],"affiliations":[{"raw_affiliation_string":"Intel","institution_ids":["https://openalex.org/I4210158342"]},{"raw_affiliation_string":"Intel#TAB#","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032540349","display_name":"Gautham Beeraka","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]},{"id":"https://openalex.org/I4210158342","display_name":"Intel (United Kingdom)","ror":"https://ror.org/058cxws58","country_code":"GB","type":"company","lineage":["https://openalex.org/I1343180700","https://openalex.org/I4210158342"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Gautham Beeraka","raw_affiliation_strings":["Intel","Intel#TAB#"],"affiliations":[{"raw_affiliation_string":"Intel","institution_ids":["https://openalex.org/I4210158342"]},{"raw_affiliation_string":"Intel#TAB#","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031955160","display_name":"Binyu Zang","orcid":"https://orcid.org/0000-0002-1968-7645"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Binyu Zang","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5065889904"],"corresponding_institution_ids":["https://openalex.org/I161318765"],"apc_list":null,"apc_paid":null,"fwci":3.958,"has_fulltext":true,"cited_by_count":20,"citation_normalized_percentile":{"value":0.93764574,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"11","issue":"4","first_page":"1","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8941053152084351},{"id":"https://openalex.org/keywords/multithreading","display_name":"Multithreading","score":0.556767463684082},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.5494012236595154},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.509171187877655},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.500903844833374},{"id":"https://openalex.org/keywords/xeon-phi","display_name":"Xeon Phi","score":0.4742676019668579},{"id":"https://openalex.org/keywords/non-uniform-memory-access","display_name":"Non-uniform memory access","score":0.44504615664482117},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4410207271575928},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.42793500423431396},{"id":"https://openalex.org/keywords/extended-memory","display_name":"Extended memory","score":0.4206056296825409},{"id":"https://openalex.org/keywords/cas-latency","display_name":"CAS latency","score":0.4100978970527649},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3979036808013916},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.3652689456939697},{"id":"https://openalex.org/keywords/memory-controller","display_name":"Memory controller","score":0.29997164011001587},{"id":"https://openalex.org/keywords/registered-memory","display_name":"Registered memory","score":0.26833996176719666},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.26515254378318787},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.17414554953575134},{"id":"https://openalex.org/keywords/semiconductor-memory","display_name":"Semiconductor memory","score":0.13972631096839905},{"id":"https://openalex.org/keywords/cache-coloring","display_name":"Cache coloring","score":0.13205787539482117}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8941053152084351},{"id":"https://openalex.org/C201410400","wikidata":"https://www.wikidata.org/wiki/Q1064412","display_name":"Multithreading","level":3,"score":0.556767463684082},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.5494012236595154},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.509171187877655},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.500903844833374},{"id":"https://openalex.org/C96972482","wikidata":"https://www.wikidata.org/wiki/Q1049168","display_name":"Xeon Phi","level":2,"score":0.4742676019668579},{"id":"https://openalex.org/C133371097","wikidata":"https://www.wikidata.org/wiki/Q868014","display_name":"Non-uniform memory access","level":5,"score":0.44504615664482117},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4410207271575928},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.42793500423431396},{"id":"https://openalex.org/C171675096","wikidata":"https://www.wikidata.org/wiki/Q1143380","display_name":"Extended memory","level":4,"score":0.4206056296825409},{"id":"https://openalex.org/C189930140","wikidata":"https://www.wikidata.org/wiki/Q1112878","display_name":"CAS latency","level":4,"score":0.4100978970527649},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3979036808013916},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.3652689456939697},{"id":"https://openalex.org/C100800780","wikidata":"https://www.wikidata.org/wiki/Q1175867","display_name":"Memory controller","level":3,"score":0.29997164011001587},{"id":"https://openalex.org/C93446704","wikidata":"https://www.wikidata.org/wiki/Q449328","display_name":"Registered memory","level":3,"score":0.26833996176719666},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.26515254378318787},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.17414554953575134},{"id":"https://openalex.org/C98986596","wikidata":"https://www.wikidata.org/wiki/Q1143031","display_name":"Semiconductor memory","level":2,"score":0.13972631096839905},{"id":"https://openalex.org/C201148951","wikidata":"https://www.wikidata.org/wiki/Q5015976","display_name":"Cache coloring","level":4,"score":0.13205787539482117}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2687356","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2687356","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2687356","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/2687356","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2687356","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2687356","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.5299999713897705,"display_name":"Industry, innovation and infrastructure"}],"awards":[{"id":"https://openalex.org/G1798822627","display_name":"CPS: Medium: Embedded Fault Detection for Low-Cost, Safety-Critical  Systems","funder_award_id":"0931931","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5210035162","display_name":"An Integrated Design and CAD Approach for Efficient Power Delivery in Multicore Processors","funder_award_id":"0903427","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6012535679","display_name":"SHF: Small: In Vivo Software Monitoring: Architectural and Compiler Support","funder_award_id":"0916583","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7630411954","display_name":null,"funder_award_id":"CCF-0916583","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8662217532","display_name":"CRI: IAD  Exploiting Multicore Processor Technology for Interactive Supercomputing","funder_award_id":"0708822","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320309636","display_name":"University of Minnesota","ror":"https://ror.org/03grvy078"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2112532446.pdf","grobid_xml":"https://content.openalex.org/works/W2112532446.grobid-xml"},"referenced_works_count":26,"referenced_works":["https://openalex.org/W1597642815","https://openalex.org/W1819012263","https://openalex.org/W1964191343","https://openalex.org/W1976773142","https://openalex.org/W1978169377","https://openalex.org/W1996564291","https://openalex.org/W2033875825","https://openalex.org/W2036026652","https://openalex.org/W2036162037","https://openalex.org/W2063186542","https://openalex.org/W2096365807","https://openalex.org/W2099958604","https://openalex.org/W2110195531","https://openalex.org/W2113441357","https://openalex.org/W2118518065","https://openalex.org/W2125212667","https://openalex.org/W2127643746","https://openalex.org/W2132292283","https://openalex.org/W2141181087","https://openalex.org/W2147743629","https://openalex.org/W2152659795","https://openalex.org/W2165169256","https://openalex.org/W2166620913","https://openalex.org/W2264044109","https://openalex.org/W4248073216","https://openalex.org/W4255681033"],"related_works":["https://openalex.org/W2952807630","https://openalex.org/W4300876969","https://openalex.org/W2532617734","https://openalex.org/W2547220881","https://openalex.org/W1603816627","https://openalex.org/W2182219686","https://openalex.org/W2543262377","https://openalex.org/W3008068282","https://openalex.org/W4233816696","https://openalex.org/W396399311"],"abstract_inverted_index":{"As":[0],"multicore":[1,259,342],"and":[2,18,24,41,52,55,66,78,130,159,181,192,244,272,298,301,313,338,343],"many-core":[3,67,131,208,344],"architectures":[4],"evolve,":[5],"their":[6],"memory":[7,61,72,92,125,150,233,335],"systems":[8],"are":[9,36,42,97,236,270],"becoming":[10],"increasingly":[11],"more":[12],"complex.":[13],"To":[14,240],"bridge":[15],"the":[16,22,90,145,165,188,202,242,246,254,279,282,306,320],"latency":[17,180],"bandwidth":[19,183],"gap":[20],"between":[21],"processor":[23,209,260],"memory,":[25],"they":[26,48],"often":[27,98],"use":[28,221,253],"a":[29,112,222,250,287],"mix":[30],"of":[31,89,138,148,167,190,224,231,248,281,340],"multilevel":[32],"private/shared":[33],"caches":[34],"that":[35,142,303,327],"either":[37],"blocking":[38],"or":[39],"nonblocking":[40],"connected":[43],"by":[44,304],"high-speed":[45],"network-on-chip.":[46],"Moreover,":[47,276],"also":[49,185,330],"incorporate":[50,70],"hardware":[51,193],"software":[53,191],"prefetching":[54,194,291],"simultaneous":[56],"multithreading":[57],"(SMT)":[58],"to":[59,69,85,122,157,226,277,319],"hide":[60],"latency.":[62],"On":[63],"such":[64,95,149,249,341],"multi-":[65,129],"systems,":[68],"various":[71,228],"optimization":[73],"schemes":[74],"using":[75,305],"compiler":[76],"optimizations":[77],"performance":[79,315],"tuning":[80],"techniques,":[81],"it":[82],"is":[83],"crucial":[84],"have":[86],"microarchitectural":[87,126,229],"details":[88,96,127,189],"target":[91],"system.":[93],"Unfortunately,":[94],"unavailable":[99],"from":[100,238],"vendors,":[101],"especially":[102],"for":[103],"newly":[104,203],"released":[105,204],"processors.":[106,132],"In":[107],"this":[108],"article,":[109],"we":[110,216,219,252,285,309],"propose":[111],"novel":[113],"microbenchmarking":[114],"methodology":[115,173],"based":[116],"on":[117,294],"short":[118],"elapsed-time":[119],"events":[120],"(SETEs)":[121],"obtain":[123],"comprehensive":[124],"in":[128,198],"This":[133],"approach":[134],"requires":[135],"detailed":[136],"analysis":[137],"potential":[139],"interfering":[140,162],"factors":[141],"could":[143],"affect":[144],"intended":[146],"behavior":[147],"systems.":[151],"We":[152,325],"lay":[153],"out":[154],"effective":[155],"guidelines":[156],"control":[158],"mitigate":[160],"those":[161],"factors.":[163],"Taking":[164],"impact":[166],"SMT":[168],"into":[169,334],"consideration,":[170],"our":[171],"proposed":[172],"not":[174,196],"only":[175],"can":[176,186,220,273,310],"measure":[177],"traditional":[178],"cache/memory":[179],"off-chip":[182],"but":[184],"uncover":[187],"units":[195],"attempted":[197],"previous":[199],"studies.":[200],"Using":[201],"Intel":[205,256,322],"Xeon":[206,296],"Phi":[207,297],"(with":[210,261],"in-order":[211],"cores)":[212,263],"as":[213,264],"an":[214],"example,":[215,266],"show":[217,302],"how":[218],"set":[223],"microbenchmarks":[225],"determine":[227],"features":[230],"its":[232],"system":[234],"(many":[235],"undocumented":[237],"vendors).":[239],"demonstrate":[241],"portability":[243],"validate":[245],"correctness":[247],"methodology,":[251],"well-documented":[255],"Sandy":[257,299],"Bridge":[258,300],"out-of-order":[262],"another":[265],"where":[267],"most":[268],"data":[269,290],"available":[271],"be":[274],"validated.":[275],"illustrate":[278],"usefulness":[280],"measured":[283,307],"data,":[284,308],"do":[286],"multistage":[288],"coordinated":[289],"case":[292],"study":[293],"both":[295],"achieve":[311],"1.3X":[312],"1.08X":[314],"speedup,":[316],"respectively,":[317],"compared":[318],"state-of-the-art":[321],"ICC":[323],"compiler.":[324],"believe":[326],"these":[328],"measurements":[329],"provide":[331],"useful":[332],"insights":[333],"optimization,":[336],"analysis,":[337],"modeling":[339],"architectures.":[345]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
