{"id":"https://openalex.org/W2135868636","doi":"https://doi.org/10.1145/1504176.1504210","title":"Effective performance measurement and analysis of multithreaded applications","display_name":"Effective performance measurement and analysis of multithreaded applications","publication_year":2009,"publication_date":"2009-02-14","ids":{"openalex":"https://openalex.org/W2135868636","doi":"https://doi.org/10.1145/1504176.1504210","mag":"2135868636"},"language":"en","primary_location":{"id":"doi:10.1145/1504176.1504210","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1504176.1504210","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM SIGPLAN symposium on Principles and practice of parallel programming","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047746050","display_name":"Nathan R. Tallent","orcid":"https://orcid.org/0000-0003-4297-3057"},"institutions":[{"id":"https://openalex.org/I74775410","display_name":"Rice University","ror":"https://ror.org/008zs3103","country_code":"US","type":"education","lineage":["https://openalex.org/I74775410"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nathan R. Tallent","raw_affiliation_strings":["Rice University, Houston, TX, USA","Rice Univ., Houston, TX, USA#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Rice University, Houston, TX, USA","institution_ids":["https://openalex.org/I74775410"]},{"raw_affiliation_string":"Rice Univ., Houston, TX, USA#TAB#","institution_ids":["https://openalex.org/I74775410"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089709469","display_name":"John Mellor\u2010Crummey","orcid":"https://orcid.org/0000-0002-9026-5453"},"institutions":[{"id":"https://openalex.org/I74775410","display_name":"Rice University","ror":"https://ror.org/008zs3103","country_code":"US","type":"education","lineage":["https://openalex.org/I74775410"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John M. Mellor-Crummey","raw_affiliation_strings":["Rice University, Houston, TX, USA","Rice Univ., Houston, TX, USA#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Rice University, Houston, TX, USA","institution_ids":["https://openalex.org/I74775410"]},{"raw_affiliation_string":"Rice Univ., Houston, TX, USA#TAB#","institution_ids":["https://openalex.org/I74775410"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":11.7958,"has_fulltext":false,"cited_by_count":110,"citation_normalized_percentile":{"value":0.98836109,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"229","last_page":"240"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9056186676025391},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.7803815007209778},{"id":"https://openalex.org/keywords/concurrency","display_name":"Concurrency","score":0.7319022417068481},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7219013571739197},{"id":"https://openalex.org/keywords/posix-threads","display_name":"POSIX Threads","score":0.7210437655448914},{"id":"https://openalex.org/keywords/multithreading","display_name":"Multithreading","score":0.7092671394348145},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.695743978023529},{"id":"https://openalex.org/keywords/context-switch","display_name":"Context switch","score":0.5706603527069092},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.47850051522254944},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.46514785289764404},{"id":"https://openalex.org/keywords/profiling","display_name":"Profiling (computer programming)","score":0.4623715877532959},{"id":"https://openalex.org/keywords/optimizing-compiler","display_name":"Optimizing compiler","score":0.45538854598999023},{"id":"https://openalex.org/keywords/yarn","display_name":"Yarn","score":0.4473428726196289},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4444037079811096},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.4227040708065033},{"id":"https://openalex.org/keywords/programming-paradigm","display_name":"Programming paradigm","score":0.4134077727794647},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.36030593514442444}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9056186676025391},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.7803815007209778},{"id":"https://openalex.org/C193702766","wikidata":"https://www.wikidata.org/wiki/Q1414548","display_name":"Concurrency","level":2,"score":0.7319022417068481},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7219013571739197},{"id":"https://openalex.org/C41138395","wikidata":"https://www.wikidata.org/wiki/Q928112","display_name":"POSIX Threads","level":3,"score":0.7210437655448914},{"id":"https://openalex.org/C201410400","wikidata":"https://www.wikidata.org/wiki/Q1064412","display_name":"Multithreading","level":3,"score":0.7092671394348145},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.695743978023529},{"id":"https://openalex.org/C53833338","wikidata":"https://www.wikidata.org/wiki/Q1061424","display_name":"Context switch","level":2,"score":0.5706603527069092},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.47850051522254944},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.46514785289764404},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.4623715877532959},{"id":"https://openalex.org/C190902152","wikidata":"https://www.wikidata.org/wiki/Q1325106","display_name":"Optimizing compiler","level":3,"score":0.45538854598999023},{"id":"https://openalex.org/C2778787235","wikidata":"https://www.wikidata.org/wiki/Q49007","display_name":"Yarn","level":2,"score":0.4473428726196289},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4444037079811096},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.4227040708065033},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.4134077727794647},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.36030593514442444},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/1504176.1504210","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1504176.1504210","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM SIGPLAN symposium on Principles and practice of parallel programming","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.152.6973","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.152.6973","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.rice.edu/~johnmc/papers/hpctoolkit-ppopp09.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.399.380","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.399.380","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.hpctoolkit.org/pubs/ppopp-2009-hpctoolkit-work-stealing.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W57462620","https://openalex.org/W94155810","https://openalex.org/W1554398046","https://openalex.org/W1598149995","https://openalex.org/W1965478356","https://openalex.org/W1981821541","https://openalex.org/W1983412169","https://openalex.org/W1988927353","https://openalex.org/W2005795572","https://openalex.org/W2020009542","https://openalex.org/W2072725684","https://openalex.org/W2074590804","https://openalex.org/W2083220703","https://openalex.org/W2084531306","https://openalex.org/W2101778912","https://openalex.org/W2125295642","https://openalex.org/W2127643746","https://openalex.org/W2139216275","https://openalex.org/W2142542363","https://openalex.org/W2142702271","https://openalex.org/W4238147959","https://openalex.org/W4248649658","https://openalex.org/W6632960179"],"related_works":["https://openalex.org/W4294310504","https://openalex.org/W1995705225","https://openalex.org/W2338363223","https://openalex.org/W2104688710","https://openalex.org/W2184902834","https://openalex.org/W2158978940","https://openalex.org/W2573076482","https://openalex.org/W4283366496","https://openalex.org/W2110105483","https://openalex.org/W3130341238"],"abstract_inverted_index":{"Understanding":[0],"why":[1],"the":[2,13,107,159,193,196,226],"performance":[3,41,180,232],"of":[4,15,31,43,114,142,198,218,228,254],"a":[5,18,29,98,112,216],"multithreaded":[6,44,184],"program":[7],"does":[8],"not":[9],"improve":[10],"linearly":[11],"with":[12,22],"number":[14],"cores":[16],"in":[17,201,225,252],"shared-memory":[19],"node":[20],"populated":[21],"one":[23,138],"or":[24,157],"more":[25],"multicore":[26],"processors":[27],"is":[28,100,162,210],"problem":[30],"growing":[32],"practical":[33],"importance.":[34],"This":[35,65,190],"paper":[36],"makes":[37],"three":[38],"contributions":[39],"to":[40,50,63,69,78,90,128,139,175,244],"analysis":[42],"programs.":[45,256],"First,":[46],"we":[47,87,120,172],"describe":[48,88,173,235],"how":[49,89,174],"measure":[51,91,176,245],"and":[52,61,84,92,117,134,166,177,203,213,239,246,250],"attribute":[53,93,178,247],"parallel":[54,94],"idleness,":[55,249],"namely,":[56],"where":[57,145,158],"threads":[58],"are":[59,168],"stalled":[60],"unable":[62],"work.":[64],"technique":[66],"applies":[67],"broadly":[68],"programming":[70,185],"models":[71,80],"ranging":[72],"from":[73],"explicit":[74],"threading":[75],"(e.g.,":[76],"Pthreads)":[77],"higher-level":[79],"such":[81,187],"as":[82,188,208],"Cilk":[83,255],"OpenMP.":[85],"Second,":[86],"overhead":[95,135,167,251],"--":[96],"when":[97],"thread":[99],"performing":[101],"miscellaneous":[102],"work":[103],"other":[104],"than":[105],"executing":[106],"user's":[108],"computation.":[109],"By":[110],"employing":[111],"combination":[113],"compiler":[115],"support":[116],"post-mortem":[118],"analysis,":[119],"incur":[121],"no":[122],"measurement":[123],"cost":[124],"beyond":[125],"normal":[126],"profiling":[127],"glean":[129],"this":[130,242],"information.":[131],"Using":[132],"idleness":[133,165],"metrics":[136,181],"enables":[137],"pinpoint":[140],"areas":[141],"an":[143],"application":[144],"concurrency":[146,200],"should":[147],"be":[148],"increased":[149],"(to":[150,154],"reduce":[151,155],"idleness),":[152],"decreased":[153],"overhead),":[156],"present":[160],"parallelization":[161],"hopeless":[163],"(where":[164],"both":[169],"high).":[170],"Third,":[171],"arbitrary":[179],"for":[182],"high-level":[183],"models,":[186],"Cilk.":[189],"requires":[191],"bridging":[192],"gap":[194],"between":[195],"expression":[197],"logical":[199],"programs":[202],"its":[204],"realization":[205],"at":[206],"run-time":[207],"it":[209],"adaptively":[211],"partitioned":[212],"scheduled":[214],"onto":[215],"pool":[217],"threads.":[219],"We":[220,234],"have":[221],"prototyped":[222],"these":[223],"ideas":[224],"context":[227],"Rice":[229],"University's":[230],"HPCToolkit":[231],"tools.":[233],"our":[236],"approach,":[237],"implementation,":[238],"experiences":[240],"applying":[241],"approach":[243],"work,":[248],"executions":[253]},"counts_by_year":[{"year":2023,"cited_by_count":4},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":7},{"year":2017,"cited_by_count":9},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":6},{"year":2014,"cited_by_count":14},{"year":2013,"cited_by_count":11},{"year":2012,"cited_by_count":12}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
