{"id":"https://openalex.org/W2148808145","doi":"https://doi.org/10.1109/ipdps.2004.1303320","title":"Identifying performance bottlenecks on modern microarchitectures using an adaptable probe","display_name":"Identifying performance bottlenecks on modern microarchitectures using an adaptable probe","publication_year":2004,"publication_date":"2004-06-10","ids":{"openalex":"https://openalex.org/W2148808145","doi":"https://doi.org/10.1109/ipdps.2004.1303320","mag":"2148808145"},"language":"en","primary_location":{"id":"doi:10.1109/ipdps.2004.1303320","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps.2004.1303320","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"18th International Parallel and Distributed Processing Symposium, 2004. Proceedings.","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001916658","display_name":"G. Griem","orcid":null},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"G. Griem","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113842004","display_name":"Leonid Oliker","orcid":"https://orcid.org/0000-0002-7923-2896"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"L. Oliker","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010873686","display_name":"John Shalf","orcid":"https://orcid.org/0000-0002-0608-3690"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"J. Shalf","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5091741596","display_name":"Katherine Yelick","orcid":"https://orcid.org/0000-0003-0957-701X"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]},{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"K. Yelick","raw_affiliation_strings":["Computer Science Division, University of California, Berkeley, CA, USA","Lawrence Berkeley National Laboratory, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science Division, University of California, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5001916658"],"corresponding_institution_ids":["https://openalex.org/I148283060"],"apc_list":null,"apc_paid":null,"fwci":2.1064,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.87002479,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"255","last_page":"262"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10558","display_name":"Advancements in Semiconductor Devices and Circuit Design","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8026536107063293},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.7817615866661072},{"id":"https://openalex.org/keywords/ibm","display_name":"IBM","score":0.7281816005706787},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6950328946113586},{"id":"https://openalex.org/keywords/indirection","display_name":"Indirection","score":0.6887544989585876},{"id":"https://openalex.org/keywords/microprocessor","display_name":"Microprocessor","score":0.5952802896499634},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4605817496776581},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.44109997153282166},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.4069683253765106},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.38620468974113464},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.32214269042015076}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8026536107063293},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.7817615866661072},{"id":"https://openalex.org/C70388272","wikidata":"https://www.wikidata.org/wiki/Q5968558","display_name":"IBM","level":2,"score":0.7281816005706787},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6950328946113586},{"id":"https://openalex.org/C89377073","wikidata":"https://www.wikidata.org/wiki/Q1171224","display_name":"Indirection","level":2,"score":0.6887544989585876},{"id":"https://openalex.org/C2780728072","wikidata":"https://www.wikidata.org/wiki/Q5297","display_name":"Microprocessor","level":2,"score":0.5952802896499634},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4605817496776581},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.44109997153282166},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.4069683253765106},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.38620468974113464},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.32214269042015076},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C171250308","wikidata":"https://www.wikidata.org/wiki/Q11468","display_name":"Nanotechnology","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/ipdps.2004.1303320","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps.2004.1303320","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"18th International Parallel and Distributed Processing Symposium, 2004. Proceedings.","raw_type":"proceedings-article"},{"id":"pmh:oai:escholarship.org/ark:/13030/qt9203n4p5","is_oa":false,"landing_page_url":"https://escholarship.org/uc/item/9203n4p5","pdf_url":null,"source":{"id":"https://openalex.org/S4306400115","display_name":"eScholarship (California Digital Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801248553","host_organization_name":"California Digital Library","host_organization_lineage":["https://openalex.org/I2801248553"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"},{"id":"pmh:qt9203n4p5","is_oa":false,"landing_page_url":"http://www.escholarship.org/uc/item/9203n4p5","pdf_url":null,"source":{"id":"https://openalex.org/S4306400115","display_name":"eScholarship (California Digital Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801248553","host_organization_name":"California Digital Library","host_organization_lineage":["https://openalex.org/I2801248553"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Griem, Gorden; Oliker, Leonid; Shalf, John; &amp; Yelick, Katherine. (2004). Identifying performance bottlenecks on modern microarchitectures using \\nan adaptable probe. Lawrence Berkeley National Laboratory. Lawrence Berkeley National Laboratory: Lawrence Berkeley National Laboratory. Retrieved from: http://www.escholarship.org/uc/item/9203n4p5","raw_type":"article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5199999809265137,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W1481683432","https://openalex.org/W2015598590","https://openalex.org/W2098432400"],"related_works":["https://openalex.org/W2118132537","https://openalex.org/W2131787664","https://openalex.org/W56218782","https://openalex.org/W1810016699","https://openalex.org/W4240556436","https://openalex.org/W2495336520","https://openalex.org/W2595172197","https://openalex.org/W4324141066","https://openalex.org/W2084856301","https://openalex.org/W2127970246"],"abstract_inverted_index":{"Summary":[0],"form":[1],"only":[2,88],"given.":[3],"The":[4,24],"gap":[5],"between":[6],"peak":[7],"and":[8,76,103,112,139],"delivered":[9],"performance":[10,30,131],"for":[11,128],"scientific":[12,96],"applications":[13],"running":[14],"on":[15,32],"microprocessor-based":[16],"systems":[17],"has":[18],"grown":[19],"considerably":[20],"in":[21,71],"recent":[22],"years.":[23],"inability":[25],"to":[26,39,62,91,117],"achieve":[27],"the":[28,108],"desired":[29],"even":[31],"a":[33,50,68],"single":[34],"processor":[35],"is":[36],"often":[37],"attributed":[38],"an":[40,58],"inadequate":[41],"memory":[42],"system,":[43],"but":[44],"without":[45],"identification":[46],"or":[47],"quantification":[48],"of":[49,95,110],"specific":[51],"bottleneck.":[52],"In":[53],"this":[54],"work,":[55],"we":[56],"use":[57],"adaptable":[59,83],"synthetic":[60],"benchmark":[61],"isolate":[63],"application":[64,74],"characteristics":[65,94],"that":[66,123],"cause":[67],"significant":[69],"drop":[70],"performance,":[72],"giving":[73],"programmers":[75],"architects":[77],"information":[78],"about":[79],"possible":[80],"optimizations.":[81],"Our":[82],"probe,":[84],"called":[85],"sqmat,":[86],"uses":[87,113],"four":[89,119],"parameters":[90,116],"capture":[92],"key":[93],"workloads:":[97],"working-set":[98],"size,":[99],"computational":[100],"intensity,":[101],"indirection,":[102],"irregularity.":[104],"This":[105],"paper":[106],"describes":[107],"implementation":[109],"sqmat":[111],"its":[114],"tunable":[115],"evaluate":[118],"leading":[120],"64-bit":[121],"microprocessors":[122],"are":[124],"popular":[125],"building":[126],"blocks":[127],"current":[129],"high":[130],"systems:":[132],"Intel":[133],"Itanium2,":[134],"AMD":[135],"Opteron,":[136],"IBM":[137,140],"Power3,":[138],"Power4.":[141]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
