{"id":"https://openalex.org/W2090261334","doi":"https://doi.org/10.1142/s0218126612400087","title":"CODE TRANSFORMATIONS FOR ENHANCING THE PERFORMANCE OF SPECULATIVELY PARALLEL THREADS","display_name":"CODE TRANSFORMATIONS FOR ENHANCING THE PERFORMANCE OF SPECULATIVELY PARALLEL THREADS","publication_year":2012,"publication_date":"2012-04-01","ids":{"openalex":"https://openalex.org/W2090261334","doi":"https://doi.org/10.1142/s0218126612400087","mag":"2090261334"},"language":"en","primary_location":{"id":"doi:10.1142/s0218126612400087","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0218126612400087","pdf_url":null,"source":{"id":"https://openalex.org/S167602672","display_name":"Journal of Circuits Systems and Computers","issn_l":"0218-1266","issn":["0218-1266","1793-6454"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Circuits, Systems and Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076180875","display_name":"Shengyue Wang","orcid":"https://orcid.org/0000-0001-9589-7719"},"institutions":[{"id":"https://openalex.org/I1342911587","display_name":"Oracle (United States)","ror":"https://ror.org/006c77m33","country_code":"US","type":"company","lineage":["https://openalex.org/I1342911587"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"SHENGYUE WANG","raw_affiliation_strings":["Oracle Corporation, Santa Clara, California, 95054, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Oracle Corporation, Santa Clara, California, 95054, USA","institution_ids":["https://openalex.org/I1342911587"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052005800","display_name":"Pen-Chung Yew","orcid":"https://orcid.org/0000-0001-9653-8777"},"institutions":[{"id":"https://openalex.org/I130238516","display_name":"University of Minnesota","ror":"https://ror.org/017zqws13","country_code":"US","type":"education","lineage":["https://openalex.org/I130238516"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"PEN-CHUNG YEW","raw_affiliation_strings":["Department of Computer Science and Engineering, University of Minnesota, Minneapolis, Minnesota, 55455, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of Minnesota, Minneapolis, Minnesota, 55455, USA","institution_ids":["https://openalex.org/I130238516"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053597061","display_name":"Antonia Zhai","orcid":"https://orcid.org/0000-0002-8921-1415"},"institutions":[{"id":"https://openalex.org/I130238516","display_name":"University of Minnesota","ror":"https://ror.org/017zqws13","country_code":"US","type":"education","lineage":["https://openalex.org/I130238516"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"ANTONIA ZHAI","raw_affiliation_strings":["Department of Computer Science and Engineering, University of Minnesota, Minneapolis, Minnesota, 55455, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of Minnesota, Minneapolis, Minnesota, 55455, USA","institution_ids":["https://openalex.org/I130238516"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5076180875"],"corresponding_institution_ids":["https://openalex.org/I1342911587"],"apc_list":null,"apc_paid":null,"fwci":0.5858,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.68719479,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"21","issue":"02","first_page":"1240008","last_page":"1240008"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8945344090461731},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.7666693329811096},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7045078873634338},{"id":"https://openalex.org/keywords/serialization","display_name":"Serialization","score":0.6923263669013977},{"id":"https://openalex.org/keywords/control-flow","display_name":"Control flow","score":0.6376157999038696},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5838075876235962},{"id":"https://openalex.org/keywords/speculative-multithreading","display_name":"Speculative multithreading","score":0.581323504447937},{"id":"https://openalex.org/keywords/synchronizing","display_name":"Synchronizing","score":0.5574672222137451},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.45955100655555725},{"id":"https://openalex.org/keywords/multithreading","display_name":"Multithreading","score":0.4404972195625305},{"id":"https://openalex.org/keywords/register-allocation","display_name":"Register allocation","score":0.41822001338005066},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.34047532081604004},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.21551242470741272}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8945344090461731},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.7666693329811096},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7045078873634338},{"id":"https://openalex.org/C52723943","wikidata":"https://www.wikidata.org/wiki/Q1127410","display_name":"Serialization","level":2,"score":0.6923263669013977},{"id":"https://openalex.org/C160191386","wikidata":"https://www.wikidata.org/wiki/Q868299","display_name":"Control flow","level":2,"score":0.6376157999038696},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5838075876235962},{"id":"https://openalex.org/C15296174","wikidata":"https://www.wikidata.org/wiki/Q7575343","display_name":"Speculative multithreading","level":4,"score":0.581323504447937},{"id":"https://openalex.org/C162932704","wikidata":"https://www.wikidata.org/wiki/Q1058791","display_name":"Synchronizing","level":3,"score":0.5574672222137451},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.45955100655555725},{"id":"https://openalex.org/C201410400","wikidata":"https://www.wikidata.org/wiki/Q1064412","display_name":"Multithreading","level":3,"score":0.4404972195625305},{"id":"https://openalex.org/C128916667","wikidata":"https://www.wikidata.org/wiki/Q1343660","display_name":"Register allocation","level":3,"score":0.41822001338005066},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.34047532081604004},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.21551242470741272},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C761482","wikidata":"https://www.wikidata.org/wiki/Q118093","display_name":"Transmission (telecommunications)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1142/s0218126612400087","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0218126612400087","pdf_url":null,"source":{"id":"https://openalex.org/S167602672","display_name":"Journal of Circuits Systems and Computers","issn_l":"0218-1266","issn":["0218-1266","1793-6454"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Circuits, Systems and Computers","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306087","display_name":"Semiconductor Research Corporation","ror":"https://ror.org/047z4n946"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1491178396","https://openalex.org/W1494930385","https://openalex.org/W1749933618","https://openalex.org/W1869745331","https://openalex.org/W1899405913","https://openalex.org/W1974831646","https://openalex.org/W2002549773","https://openalex.org/W2009264559","https://openalex.org/W2010220656","https://openalex.org/W2024547254","https://openalex.org/W2052285774","https://openalex.org/W2057473983","https://openalex.org/W2095703459","https://openalex.org/W2100184706","https://openalex.org/W2103224933","https://openalex.org/W2106018697","https://openalex.org/W2107150212","https://openalex.org/W2109502721","https://openalex.org/W2112482891","https://openalex.org/W2113751407","https://openalex.org/W2122545654","https://openalex.org/W2122842533","https://openalex.org/W2125340270","https://openalex.org/W2127642645","https://openalex.org/W2134633067","https://openalex.org/W2135470015","https://openalex.org/W2136798749","https://openalex.org/W2149473197","https://openalex.org/W2155410188","https://openalex.org/W2164393230","https://openalex.org/W2165856991","https://openalex.org/W2169000705","https://openalex.org/W2537115708"],"related_works":["https://openalex.org/W2381891087","https://openalex.org/W1540718544","https://openalex.org/W2977376961","https://openalex.org/W2788049983","https://openalex.org/W2171607039","https://openalex.org/W1999755551","https://openalex.org/W2650626006","https://openalex.org/W2382921070","https://openalex.org/W2765149243","https://openalex.org/W2000490092"],"abstract_inverted_index":{"As":[0],"technology":[1],"advances,":[2],"microprocessors":[3],"that":[4,64,76,209,241],"integrate":[5],"multiple":[6],"cores":[7],"on":[8],"a":[9,26,31,74,193,214],"single":[10,27],"chip":[11],"are":[12,65],"becoming":[13],"increasingly":[14],"common.":[15],"How":[16],"to":[17,21,40,46,67,97,133,149,161,195,199],"use":[18],"these":[19,90],"processors":[20],"improve":[22,150,218,229],"the":[23,47,101,112,115,151,163,179,181,183,205,221,226,230,235,238],"performance":[24,79,231],"of":[25,103,118,192,204,220,237],"program":[28,139],"has":[29],"been":[30,244],"challenge.":[32],"For":[33],"general-purpose":[34,128],"applications,":[35],"it":[36,131],"is":[37,82],"especially":[38],"difficult":[39],"create":[41],"efficient":[42,86],"parallel":[43,87],"execution":[44],"due":[45,198],"complex":[48],"control":[49,121],"flow":[50,122],"and":[51,57,123,157,177,186],"ambiguous":[52],"data":[53,124],"dependences.":[54],"Thread-level":[55],"speculation":[56],"transactional":[58],"memory":[59,170],"provide":[60],"two":[61],"hardware":[62],"mechanisms":[63],"able":[66,96],"optimistically":[68],"parallelize":[69],"potentially":[70],"dependent":[71],"threads.":[72],"However,":[73],"compiler":[75,93],"performs":[77],"detailed":[78],"trade-off":[80],"analysis":[81],"essential":[83],"for":[84,89,234,246],"generating":[85,158],"programs":[88],"hardwares.":[91],"This":[92],"must":[94],"be":[95],"take":[98],"into":[99],"consideration":[100],"cost":[102],"intra-thread":[104],"as":[105,107],"well":[106],"inter-thread":[108],"value":[109,248],"communication.":[110,249],"On":[111,224],"other":[113],"hand,":[114],"ubiquitous":[116],"existence":[117],"complex,":[119],"input-dependent":[120],"dependence":[125],"patterns":[126],"in":[127],"applications":[129],"makes":[130],"impossible":[132],"have":[134,242],"one":[135],"technique":[136,212],"optimize":[137],"all":[138,219],"patterns.":[140],"In":[141],"this":[142],"paper,":[143],"we":[144],"propose":[145],"three":[146],"optimization":[147,211],"techniques":[148],"thread":[152],"performance:":[153],"(i)":[154],"scheduling":[155],"instruction":[156],"recovery":[159],"code":[160,180],"reduce":[162],"critical":[164],"forwarding":[165],"path":[166],"introduced":[167],"by":[168,232],"synchronizing":[169],"resident":[171],"values;":[172],"(ii)":[173],"identifying":[174],"reduction":[175],"variables":[176],"transforming":[178],"minimize":[182],"serializing":[184],"execution;":[185],"(iii)":[187],"dynamically":[188],"merging":[189],"consecutive":[190],"iterations":[191],"loop":[194],"avoid":[196],"stalls":[197],"unbalanced":[200],"workload.":[201],"Detailed":[202],"evaluation":[203],"proposed":[206,227],"mechanism":[207],"shows":[208],"each":[210],"improves":[213],"subset":[215],"but":[216],"none":[217],"SPEC2000":[222,239],"benchmarks.":[223],"average,":[225],"optimizations":[228],"7%":[233],"set":[236],"benchmarks":[240],"already":[243],"optimized":[245],"register-resident":[247]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2013,"cited_by_count":2}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
