{"id":"https://openalex.org/W2125979435","doi":"https://doi.org/10.1109/hpca.2013.6522352","title":"The dual-path execution model for efficient GPU control flow","display_name":"The dual-path execution model for efficient GPU control flow","publication_year":2013,"publication_date":"2013-02-01","ids":{"openalex":"https://openalex.org/W2125979435","doi":"https://doi.org/10.1109/hpca.2013.6522352","mag":"2125979435"},"language":"en","primary_location":{"id":"doi:10.1109/hpca.2013.6522352","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca.2013.6522352","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE 19th International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091648103","display_name":"Minsoo Rhu","orcid":"https://orcid.org/0000-0003-3303-8681"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Minsoo Rhu","raw_affiliation_strings":["Electrical and Computer Engineering Department, University of Texas at Austin, USA","Electr. & Comput. Eng. Dept., Univ. of Texas at Austin, Austin, TX, USA"],"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering Department, University of Texas at Austin, USA","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"Electr. & Comput. Eng. Dept., Univ. of Texas at Austin, Austin, TX, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013680653","display_name":"Mattan Erez","orcid":"https://orcid.org/0000-0002-1567-4097"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"M. Erez","raw_affiliation_strings":["Electrical and Computer Engineering Department, University of Texas at Austin, USA","Electr. & Comput. Eng. Dept., Univ. of Texas at Austin, Austin, TX, USA"],"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering Department, University of Texas at Austin, USA","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"Electr. & Comput. Eng. Dept., Univ. of Texas at Austin, Austin, TX, USA","institution_ids":["https://openalex.org/I86519309"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5091648103"],"corresponding_institution_ids":["https://openalex.org/I86519309"],"apc_list":null,"apc_paid":null,"fwci":10.4658,"has_fulltext":false,"cited_by_count":68,"citation_normalized_percentile":{"value":0.98501421,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"591","last_page":"602"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8271717429161072},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7284467220306396},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.6556620597839355},{"id":"https://openalex.org/keywords/control-flow","display_name":"Control flow","score":0.6333032846450806},{"id":"https://openalex.org/keywords/stack","display_name":"Stack (abstract data type)","score":0.6219313740730286},{"id":"https://openalex.org/keywords/call-stack","display_name":"Call stack","score":0.6146336793899536},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.48626458644866943},{"id":"https://openalex.org/keywords/concurrency","display_name":"Concurrency","score":0.47268277406692505},{"id":"https://openalex.org/keywords/serialization","display_name":"Serialization","score":0.4607885777950287},{"id":"https://openalex.org/keywords/path","display_name":"Path (computing)","score":0.4546849727630615},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.2994779944419861},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.18737304210662842},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1716776192188263}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8271717429161072},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7284467220306396},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.6556620597839355},{"id":"https://openalex.org/C160191386","wikidata":"https://www.wikidata.org/wiki/Q868299","display_name":"Control flow","level":2,"score":0.6333032846450806},{"id":"https://openalex.org/C9395851","wikidata":"https://www.wikidata.org/wiki/Q177929","display_name":"Stack (abstract data type)","level":2,"score":0.6219313740730286},{"id":"https://openalex.org/C119024030","wikidata":"https://www.wikidata.org/wiki/Q759899","display_name":"Call stack","level":3,"score":0.6146336793899536},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.48626458644866943},{"id":"https://openalex.org/C193702766","wikidata":"https://www.wikidata.org/wiki/Q1414548","display_name":"Concurrency","level":2,"score":0.47268277406692505},{"id":"https://openalex.org/C52723943","wikidata":"https://www.wikidata.org/wiki/Q1127410","display_name":"Serialization","level":2,"score":0.4607885777950287},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.4546849727630615},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2994779944419861},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.18737304210662842},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1716776192188263}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/hpca.2013.6522352","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca.2013.6522352","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE 19th International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.648.3126","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.648.3126","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://lph.ece.utexas.edu/merez/uploads/MattanErez/hpca2013_dpe.pdf","raw_type":"text"},{"id":"pmh:oai:oasis.postech.ac.kr:2014.oak/43390","is_oa":false,"landing_page_url":"https://oasis.postech.ac.kr/handle/2014.oak/43390","pdf_url":null,"source":{"id":"https://openalex.org/S4306401965","display_name":"Open Access System for Information Sharing (Pohang University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I123900574","host_organization_name":"Pohang University of Science and Technology","host_organization_lineage":["https://openalex.org/I123900574"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1567324076","https://openalex.org/W1979527452","https://openalex.org/W2040469876","https://openalex.org/W2047060659","https://openalex.org/W2078983643","https://openalex.org/W2080592089","https://openalex.org/W2090584832","https://openalex.org/W2114067856","https://openalex.org/W2135947393","https://openalex.org/W2145866640","https://openalex.org/W2148443481","https://openalex.org/W2155568054","https://openalex.org/W2156540297","https://openalex.org/W2156831150","https://openalex.org/W2160428323","https://openalex.org/W2167675119","https://openalex.org/W2168921806","https://openalex.org/W2169880332","https://openalex.org/W2182505841","https://openalex.org/W2269491154","https://openalex.org/W2273348289","https://openalex.org/W3148394109","https://openalex.org/W4297838741"],"related_works":["https://openalex.org/W874095766","https://openalex.org/W2008860182","https://openalex.org/W2135849267","https://openalex.org/W2809457911","https://openalex.org/W1991206705","https://openalex.org/W2167049910","https://openalex.org/W2109219878","https://openalex.org/W332253968","https://openalex.org/W3013776785","https://openalex.org/W2617247280"],"abstract_inverted_index":{"Current":[0],"graphics":[1],"processing":[2],"units":[3],"(GPUs)":[4],"utilize":[5],"the":[6,27,45,66,69,75,91,109,119,146,156,183,188,204,250,270,273],"single":[7,31,56,99,163],"instruction":[8,33],"multiple":[9,92],"thread":[10],"(SIMT)":[11],"execution":[12,52,64,189,211,267,281],"model.":[13],"With":[14],"SIMT,":[15],"a":[16,30,35,55,61,85,98,162,180,200,235,239],"group":[17,28,46,70],"of":[18,47,65,118,154,190,238,241,272],"logical":[19],"threads":[20,25,67,123],"executes":[21],"such":[22],"that":[23,71,160,223,249],"all":[24],"in":[26,68,175,186,290],"execute":[29],"common":[32],"on":[34,74,84,284],"particular":[36],"cycle.":[37],"To":[38],"enable":[39,122],"control":[40,57,141,245],"flow":[41,58,104,142,246],"to":[42,89,96,121,124,182,203,219,259],"diverge":[43,113],"within":[44],"threads,":[48],"GPUs":[49,82],"partially":[50],"serialize":[51],"and":[53,95,114,126,222,247,286],"follow":[54],"path":[59,77,100,164,264],"at":[60],"time.":[62],"The":[63,131,152],"are":[72,106,115],"not":[73,169],"current":[76,81,220],"is":[78,159,165,199,225,254],"masked.":[79],"Most":[80],"rely":[83],"hardware":[86,185,221],"reconvergence":[87,136,157],"stack":[88,110,120,132,158,184,205,252,276],"track":[90],"concurrent":[93],"paths":[94,105,193],"choose":[97],"for":[101,137,262],"execution.":[102],"Control":[103],"pushed":[107],"onto":[108],"when":[111],"they":[112],"popped":[116],"off":[117],"reconverge":[125],"keep":[127],"lane":[128],"utilization":[129],"high.":[130],"algorithm":[133],"guarantees":[134],"optimal":[135,229],"applications":[138],"with":[139,215,243],"structured":[140,147],"as":[143],"it":[144],"traverses":[145],"control-flow":[148,231],"tree":[149],"depth":[150],"first.":[151],"downside":[153],"using":[155],"only":[161,216],"followed,":[166],"which":[167,187],"does":[168],"maximize":[170],"available":[171],"parallelism,":[172],"degrading":[173],"performance":[174,271],"some":[176,291],"cases.":[177,292],"We":[178,233],"propose":[179],"change":[181,202],"two":[191],"different":[192],"can":[194,212],"be":[195,213],"interleaved.":[196],"While":[197],"this":[198],"fundamental":[201],"concept,":[206],"we":[207],"show":[208],"how":[209],"dual-path":[210,251],"implemented":[214],"modest":[217],"changes":[218],"parallelism":[224],"increased":[226],"without":[227],"sacrificing":[228],"(structured)":[230],"reconvergence.":[232],"perform":[234],"detailed":[236],"evaluation":[237],"set":[240],"benchmarks":[242],"divergent":[244],"demonstrate":[248],"architecture":[253,277],"much":[255],"more":[256],"robust":[257],"compared":[258],"previous":[260],"approaches":[261],"increasing":[263],"parallelism.":[265],"Dual-path":[266],"either":[268],"matches":[269],"baseline":[274],"single-path":[275,280],"or":[278],"outperforms":[279],"by":[282,287],"14.9%":[283],"average":[285],"over":[288],"30%":[289]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":7},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":12},{"year":2015,"cited_by_count":12},{"year":2014,"cited_by_count":5},{"year":2013,"cited_by_count":4}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
