{"id":"https://openalex.org/W2344044374","doi":"https://doi.org/10.1109/tcad.2015.2501303","title":"An Accurate GPU Performance Model for Effective Control Flow Divergence Optimization","display_name":"An Accurate GPU Performance Model for Effective Control Flow Divergence Optimization","publication_year":2015,"publication_date":"2015-11-17","ids":{"openalex":"https://openalex.org/W2344044374","doi":"https://doi.org/10.1109/tcad.2015.2501303","mag":"2344044374"},"language":"en","primary_location":{"id":"doi:10.1109/tcad.2015.2501303","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2015.2501303","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100604860","display_name":"Yun Liang","orcid":"https://orcid.org/0000-0002-9076-7998"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yun Liang","raw_affiliation_strings":["Center for Energy-Efficient Computing and Applications, School of Electronics Engineering and Computer Science, Peking University, Beijing, China","Collaborative Innovation Center of High Performance Computing, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"Center for Energy-Efficient Computing and Applications, School of Electronics Engineering and Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Collaborative Innovation Center of High Performance Computing, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046734969","display_name":"Muhammad T. Satria","orcid":null},"institutions":[{"id":"https://openalex.org/I4210110242","display_name":"Digital Science (United States)","ror":"https://ror.org/020h4b682","country_code":"US","type":"company","lineage":["https://openalex.org/I4210110242","https://openalex.org/I4210112888","https://openalex.org/I4210118830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Muhammad Teguh Satria","raw_affiliation_strings":["Advanced Digital Science Center, Illinois at Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Advanced Digital Science Center, Illinois at Singapore, Singapore","institution_ids":["https://openalex.org/I4210110242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090843153","display_name":"Kyle Rupnow","orcid":"https://orcid.org/0000-0003-2908-2225"},"institutions":[{"id":"https://openalex.org/I4210110242","display_name":"Digital Science (United States)","ror":"https://ror.org/020h4b682","country_code":"US","type":"company","lineage":["https://openalex.org/I4210110242","https://openalex.org/I4210112888","https://openalex.org/I4210118830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kyle Rupnow","raw_affiliation_strings":["Advanced Digital Science Center, Illinois at Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Advanced Digital Science Center, Illinois at Singapore, Singapore","institution_ids":["https://openalex.org/I4210110242"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056321228","display_name":"Deming Chen","orcid":"https://orcid.org/0000-0002-3016-0270"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Deming Chen","raw_affiliation_strings":["University of Illinois at Urbana\u2013Champaign, Champaign, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana\u2013Champaign, Champaign, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100604860"],"corresponding_institution_ids":["https://openalex.org/I170215575","https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":2.2609,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.88636568,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"35","issue":"7","first_page":"1165","last_page":"1178"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8264561891555786},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8192788362503052},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.8166418075561523},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.7487845420837402},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.6664378643035889},{"id":"https://openalex.org/keywords/control-flow","display_name":"Control flow","score":0.6324854493141174},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5151047110557556},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.43452176451683044},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.12405431270599365}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8264561891555786},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8192788362503052},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.8166418075561523},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.7487845420837402},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.6664378643035889},{"id":"https://openalex.org/C160191386","wikidata":"https://www.wikidata.org/wiki/Q868299","display_name":"Control flow","level":2,"score":0.6324854493141174},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5151047110557556},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.43452176451683044},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.12405431270599365},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcad.2015.2501303","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2015.2501303","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G483414983","display_name":null,"funder_award_id":"61300005","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320306087","display_name":"Semiconductor Research Corporation","ror":"https://ror.org/047z4n946"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1971997351","https://openalex.org/W1972663160","https://openalex.org/W1973538724","https://openalex.org/W1982996921","https://openalex.org/W2012110315","https://openalex.org/W2013247896","https://openalex.org/W2014661416","https://openalex.org/W2016579022","https://openalex.org/W2033524811","https://openalex.org/W2065493976","https://openalex.org/W2074140323","https://openalex.org/W2081373884","https://openalex.org/W2084309410","https://openalex.org/W2090584832","https://openalex.org/W2092698325","https://openalex.org/W2096661534","https://openalex.org/W2099019585","https://openalex.org/W2106018915","https://openalex.org/W2109393352","https://openalex.org/W2110195531","https://openalex.org/W2113282196","https://openalex.org/W2125979435","https://openalex.org/W2128329055","https://openalex.org/W2129232868","https://openalex.org/W2152517358","https://openalex.org/W2156467710","https://openalex.org/W2156831150","https://openalex.org/W2160428323","https://openalex.org/W2166621589","https://openalex.org/W2167334577","https://openalex.org/W2169880332","https://openalex.org/W2229412420","https://openalex.org/W2231791608","https://openalex.org/W2232645663","https://openalex.org/W4233857083","https://openalex.org/W4237024478","https://openalex.org/W4244280501","https://openalex.org/W4251590233","https://openalex.org/W6653336569","https://openalex.org/W6666852886","https://openalex.org/W6678660376","https://openalex.org/W6689165056"],"related_works":["https://openalex.org/W2111180768","https://openalex.org/W2162270818","https://openalex.org/W2994245508","https://openalex.org/W2028931088","https://openalex.org/W4242172182","https://openalex.org/W2082875307","https://openalex.org/W4237780868","https://openalex.org/W4285302443","https://openalex.org/W4396938741","https://openalex.org/W2019451907"],"abstract_inverted_index":{"Graphic":[0],"processing":[1],"units":[2],"(GPUs)":[3],"are":[4,18,44,89],"composed":[5],"of":[6,9,96,126,135,145,149,163,200,215],"a":[7,54,169],"group":[8],"single-instruction":[10],"multiple":[11],"data":[12,24,86,109],"(SIMD)":[13],"streaming":[14],"multiprocessors":[15],"(SMs).":[16],"GPUs":[17],"able":[19],"to":[20,92,101,110],"efficiently":[21],"execute":[22],"highly":[23,60],"parallel":[25,61],"tasks":[26],"through":[27],"SIMD":[28],"execution":[29],"on":[30,224,226],"the":[31,48,59,94,123,146,161,177,183,188,213],"SMs.":[32],"However,":[33,114],"if":[34],"those":[35],"threads":[36,107,184],"take":[37],"diverging":[38],"control":[39,56,70],"paths,":[40],"all":[41],"divergent":[42],"paths":[43],"executed":[45],"serially.":[46],"In":[47],"worst":[49],"case,":[50],"every":[51],"thread":[52,153,208],"takes":[53],"different":[55],"path":[57],"and":[58,85,151,175,181,217],"architecture":[62],"is":[63,74],"used":[64,91],"serially":[65],"by":[66,104,222],"each":[67],"thread.":[68],"This":[69],"flow":[71],"divergence":[72,103,129,136,150,158,170],"problem":[73],"well":[75],"known":[76],"in":[77],"GPU":[78],"development;":[79],"code":[80],"transformation,":[81],"memory":[82],"access":[83],"redirection,":[84],"layout":[87],"reorganization":[88],"commonly":[90],"reduce":[93],"impact":[95,125,148,162,214],"divergence.":[97,165],"These":[98],"techniques":[99,119],"attempt":[100],"eliminate":[102,157],"grouping":[105],"together":[106],"or":[108,130,159],"ensure":[111],"identical":[112],"behavior.":[113],"prior":[115],"efforts":[116],"using":[117],"these":[118,204,220],"do":[120],"not":[121,138],"model":[122],"performance":[124,147,195],"any":[127],"particular":[128],"consider":[131,212],"that":[132,156,173],"complete":[133],"elimination":[134],"may":[137],"be":[139],"possible.":[140],"Thus,":[141],"we":[142,167,206],"perform":[143],"analysis":[144],"potential":[152],"regrouping":[154,209],"algorithms":[155],"minimize":[160],"remaining":[164],"Finally,":[166],"develop":[168,207],"optimization":[171],"framework":[172],"analyzes":[174],"transforms":[176],"kernel":[178],"at":[179,185],"compile-time":[180],"regroups":[182],"runtime.":[186],"For":[187],"compute-bound":[189],"applications,":[190],"our":[191],"proposed":[192],"metrics":[193],"achieve":[194],"estimation":[196],"accuracy":[197],"within":[198],"6.2%":[199],"measured":[201],"performance.":[202],"Using":[203],"metrics,":[205],"algorithms,":[210],"which":[211],"divergence,":[216],"speed":[218],"up":[219],"applications":[221],"2.2\u00d7":[223],"average":[225],"NVIDIA":[227],"GTX480.":[228]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
