{"id":"https://openalex.org/W1979593478","doi":"https://doi.org/10.1145/2355585.2355586","title":"Dynamically dispatching speculative threads to improve sequential execution","display_name":"Dynamically dispatching speculative threads to improve sequential execution","publication_year":2012,"publication_date":"2012-09-01","ids":{"openalex":"https://openalex.org/W1979593478","doi":"https://doi.org/10.1145/2355585.2355586","mag":"1979593478"},"language":"en","primary_location":{"id":"doi:10.1145/2355585.2355586","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2355585.2355586","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2355585.2355586","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/2355585.2355586","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036219025","display_name":"Yangchun Luo","orcid":null},"institutions":[{"id":"https://openalex.org/I1311921367","display_name":"Advanced Micro Devices (Canada)","ror":"https://ror.org/02yh0k313","country_code":"CA","type":"company","lineage":["https://openalex.org/I1311921367","https://openalex.org/I4210137977"]},{"id":"https://openalex.org/I4210137977","display_name":"Advanced Micro Devices (United States)","ror":"https://ror.org/04kd6c783","country_code":"US","type":"company","lineage":["https://openalex.org/I4210137977"]}],"countries":["CA","US"],"is_corresponding":true,"raw_author_name":"Yangchun Luo","raw_affiliation_strings":["Advanced Micro Devices, Sunnyvale, CA","Advanced Micro Devices, Sunnyvale, CA#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Advanced Micro Devices, Sunnyvale, CA","institution_ids":["https://openalex.org/I4210137977"]},{"raw_affiliation_string":"Advanced Micro Devices, Sunnyvale, CA#TAB#","institution_ids":["https://openalex.org/I1311921367"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053597061","display_name":"Antonia Zhai","orcid":"https://orcid.org/0000-0002-8921-1415"},"institutions":[{"id":"https://openalex.org/I130238516","display_name":"University of Minnesota","ror":"https://ror.org/017zqws13","country_code":"US","type":"education","lineage":["https://openalex.org/I130238516"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Antonia Zhai","raw_affiliation_strings":["University of Minnesota, Minneapolis, MN","University of Minnesota , Minneapolis, Mn"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Minnesota, Minneapolis, MN","institution_ids":["https://openalex.org/I130238516"]},{"raw_affiliation_string":"University of Minnesota , Minneapolis, Mn","institution_ids":["https://openalex.org/I130238516"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5036219025"],"corresponding_institution_ids":["https://openalex.org/I1311921367","https://openalex.org/I4210137977"],"apc_list":null,"apc_paid":null,"fwci":0.8787,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.72436245,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"9","issue":"3","first_page":"1","last_page":"31"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9118409752845764},{"id":"https://openalex.org/keywords/speculative-multithreading","display_name":"Speculative multithreading","score":0.9037623405456543},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.8717969655990601},{"id":"https://openalex.org/keywords/spec#","display_name":"Spec#","score":0.7033980488777161},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6899577975273132},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6775071620941162},{"id":"https://openalex.org/keywords/speculative-execution","display_name":"Speculative execution","score":0.632504940032959},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.6226513385772705},{"id":"https://openalex.org/keywords/multithreading","display_name":"Multithreading","score":0.5357213616371155},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5295825004577637},{"id":"https://openalex.org/keywords/instruction-level-parallelism","display_name":"Instruction-level parallelism","score":0.4914001226425171},{"id":"https://openalex.org/keywords/execution-time","display_name":"Execution time","score":0.4259992837905884},{"id":"https://openalex.org/keywords/execution-model","display_name":"Execution model","score":0.4198416769504547},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.4093831181526184},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.32848864793777466},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.28123998641967773},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.22650480270385742}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9118409752845764},{"id":"https://openalex.org/C15296174","wikidata":"https://www.wikidata.org/wiki/Q7575343","display_name":"Speculative multithreading","level":4,"score":0.9037623405456543},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.8717969655990601},{"id":"https://openalex.org/C2778565505","wikidata":"https://www.wikidata.org/wiki/Q2207566","display_name":"Spec#","level":2,"score":0.7033980488777161},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6899577975273132},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6775071620941162},{"id":"https://openalex.org/C141331961","wikidata":"https://www.wikidata.org/wiki/Q2164465","display_name":"Speculative execution","level":2,"score":0.632504940032959},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.6226513385772705},{"id":"https://openalex.org/C201410400","wikidata":"https://www.wikidata.org/wiki/Q1064412","display_name":"Multithreading","level":3,"score":0.5357213616371155},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5295825004577637},{"id":"https://openalex.org/C140763907","wikidata":"https://www.wikidata.org/wiki/Q2714055","display_name":"Instruction-level parallelism","level":3,"score":0.4914001226425171},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.4259992837905884},{"id":"https://openalex.org/C2776834041","wikidata":"https://www.wikidata.org/wiki/Q25346349","display_name":"Execution model","level":2,"score":0.4198416769504547},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.4093831181526184},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.32848864793777466},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.28123998641967773},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.22650480270385742},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2355585.2355586","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2355585.2355586","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2355585.2355586","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/2355585.2355586","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2355585.2355586","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2355585.2355586","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3490577158","display_name":null,"funder_award_id":"CNS-0834599, EIA-0220021","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4954983090","display_name":"ITR:   DREAM -- Dynamic Re-optimization for Application Migration","funder_award_id":"0220021","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5867244935","display_name":null,"funder_award_id":"SRC-2008-TJ-1819","funder_id":"https://openalex.org/F4320306087","funder_display_name":"Semiconductor Research Corporation"},{"id":"https://openalex.org/G6308350026","display_name":null,"funder_award_id":"CNS-0834599, EIA-0220021","funder_id":"https://openalex.org/F4320337388","funder_display_name":"Division of Computer and Network Systems"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306087","display_name":"Semiconductor Research Corporation","ror":"https://ror.org/047z4n946"},{"id":"https://openalex.org/F4320337388","display_name":"Division of Computer and Network Systems","ror":"https://ror.org/02rdzmk74"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W1979593478.pdf","grobid_xml":"https://content.openalex.org/works/W1979593478.grobid-xml"},"referenced_works_count":54,"referenced_works":["https://openalex.org/W121029738","https://openalex.org/W1869745331","https://openalex.org/W1971965803","https://openalex.org/W1974831646","https://openalex.org/W1988888548","https://openalex.org/W2002549773","https://openalex.org/W2010220656","https://openalex.org/W2023128081","https://openalex.org/W2037462607","https://openalex.org/W2052285774","https://openalex.org/W2071130955","https://openalex.org/W2086833941","https://openalex.org/W2093597477","https://openalex.org/W2095703459","https://openalex.org/W2103224933","https://openalex.org/W2105121389","https://openalex.org/W2105528304","https://openalex.org/W2105864407","https://openalex.org/W2106018697","https://openalex.org/W2107138176","https://openalex.org/W2109502721","https://openalex.org/W2111905231","https://openalex.org/W2115351238","https://openalex.org/W2117515905","https://openalex.org/W2120274141","https://openalex.org/W2122545654","https://openalex.org/W2125340270","https://openalex.org/W2125623590","https://openalex.org/W2126605567","https://openalex.org/W2127643746","https://openalex.org/W2130327500","https://openalex.org/W2134633067","https://openalex.org/W2135470015","https://openalex.org/W2136798749","https://openalex.org/W2143068308","https://openalex.org/W2143207886","https://openalex.org/W2146246439","https://openalex.org/W2148589677","https://openalex.org/W2150547581","https://openalex.org/W2155410188","https://openalex.org/W2156358578","https://openalex.org/W2161992906","https://openalex.org/W2162523628","https://openalex.org/W2165856991","https://openalex.org/W2187874404","https://openalex.org/W2296006986","https://openalex.org/W2485549318","https://openalex.org/W2537115708","https://openalex.org/W4239813889","https://openalex.org/W4240765481","https://openalex.org/W4252742548","https://openalex.org/W4253095404","https://openalex.org/W4254256063","https://openalex.org/W4255602098"],"related_works":["https://openalex.org/W4247496551","https://openalex.org/W2135470015","https://openalex.org/W2101113316","https://openalex.org/W2913814439","https://openalex.org/W3012114143","https://openalex.org/W2294873030","https://openalex.org/W1936132780","https://openalex.org/W2005952754","https://openalex.org/W2542083118","https://openalex.org/W2090754905"],"abstract_inverted_index":{"Efficiently":[0],"utilizing":[1],"multicore":[2,63],"processors":[3],"to":[4,25,43,73,78,93,102,167,185,219],"improve":[5],"their":[6,130,145],"performance":[7,60,142,146,225],"potentials":[8],"demands":[9],"extracting":[10],"thread-level":[11,27],"parallelism":[12,28],"from":[13,29],"the":[14,62,71,86,94,110,123,133,169,176,179,195,203,208],"applications.":[15],"Various":[16],"novel":[17,152],"and":[18,54,113,144,183],"sophisticated":[19],"execution":[20,34,50],"models":[21],"have":[22,67],"been":[23],"proposed":[24,134],"extract":[26],"sequential":[30,75,220],"programs.":[31],"One":[32],"such":[33],"model,":[35],"Thread-Level":[36],"Speculation":[37],"(TLS),":[38],"allows":[39],"potentially":[40],"dependent":[41],"threads":[42,127,137,173,188],"execute":[44],"speculatively":[45],"in":[46,201],"parallel.":[47],"However,":[48],"TLS":[49,97,105,160],"is":[51,148,189],"inherently":[52],"unpredictable,":[53],"consequently":[55],"incorrect":[56],"speculation":[57],"could":[58],"degrade":[59],"for":[61],"systems.":[64],"Existing":[65],"approaches":[66],"focused":[68],"on":[69,129,216,227],"using":[70],"compilers":[72],"select":[74],"program":[76],"regions":[77],"apply":[79],"TLS.":[80],"Our":[81],"research":[82],"shows":[83],"that":[84,121,154],"even":[85],"state-of-the-art":[87,209],"compiler":[88],"makes":[89],"suboptimal":[90],"decisions,":[91],"due":[92],"unpredictability":[95],"of":[96,115,125,171,178],"execution.":[98],"Thus,":[99],"we":[100,222],"propose":[101],"dynamically":[103],"optimize":[104],"performance.":[106],"This":[107],"article":[108],"describes":[109],"design,":[111],"implementation,":[112],"evaluation":[114],"a":[116,151,228],"runtime":[117,180],"thread":[118,211],"dispatching":[119,163,205],"mechanism":[120],"adjusts":[122],"behaviors":[124,170],"speculative":[126,136,172,187],"based":[128],"efficiency.":[131],"In":[132],"system,":[135],"are":[138,165],"monitored":[139],"by":[140,214],"hardware-based":[141],"counters":[143],"impact":[147],"evaluated":[149],"with":[150,193],"methodology":[153],"takes":[155],"into":[156],"account":[157],"various":[158],"unique":[159],"characteristics.":[161],"Thread":[162],"policies":[164],"devised":[166],"adjust":[168],"accordingly.":[174],"With":[175],"help":[177],"evaluation,":[181],"where":[182],"how":[184],"create":[186],"better":[190],"determined.":[191],"Evaluated":[192],"all":[194],"SPEC":[196],"CPU2000":[197],"benchmark":[198],"programs":[199],"written":[200],"C,":[202],"dynamic":[204],"system":[206],"outperforms":[207],"compiler-based":[210],"management":[212],"techniques":[213],"9.4%":[215],"average.":[217],"Comparing":[218],"execution,":[221],"achieve":[223],"1.37X":[224],"improvement":[226],"four-core":[229],"CMP-based":[230],"system.":[231]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":1},{"year":2013,"cited_by_count":3}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
