{"id":"https://openalex.org/W4404133985","doi":"https://doi.org/10.1145/3649329.3658462","title":"Control Flow Divergence Optimization by Exploiting Tensor Cores","display_name":"Control Flow Divergence Optimization by Exploiting Tensor Cores","publication_year":2024,"publication_date":"2024-06-23","ids":{"openalex":"https://openalex.org/W4404133985","doi":"https://doi.org/10.1145/3649329.3658462"},"language":"en","primary_location":{"id":"doi:10.1145/3649329.3658462","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3649329.3658462","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 61st ACM/IEEE Design Automation Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030163245","display_name":"Weiguang Pang","orcid":"https://orcid.org/0000-0003-0208-4677"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]},{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]},{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]},{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiguang Pang","raw_affiliation_strings":["Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences), Jinan, Shandong, China","Shandong Provincial Key Laboratory of Computer Networks, Shandong Fundamental Research Center for Computer Science, Jinan, Shandong, China","University of Electronic Science and Technology of China, Chengdu, Sichuan, China"],"raw_orcid":"https://orcid.org/0000-0003-0208-4677","affiliations":[{"raw_affiliation_string":"Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences), Jinan, Shandong, China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]},{"raw_affiliation_string":"Shandong Provincial Key Laboratory of Computer Networks, Shandong Fundamental Research Center for Computer Science, Jinan, Shandong, China","institution_ids":["https://openalex.org/I154099455"]},{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, Sichuan, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051642924","display_name":"Xu Jiang","orcid":"https://orcid.org/0000-0003-2675-2895"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xu Jiang","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, Sichuan, China"],"raw_orcid":"https://orcid.org/0000-0003-2675-2895","affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, Sichuan, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050517081","display_name":"Songran Liu","orcid":"https://orcid.org/0000-0002-9234-5799"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Songran Liu","raw_affiliation_strings":["School of Computer Science and Engineering, Northeastern Univeristy, Shenyang, Liaoning, China"],"raw_orcid":"https://orcid.org/0000-0002-9234-5799","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Northeastern Univeristy, Shenyang, Liaoning, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100393688","display_name":"Lei Qiao","orcid":"https://orcid.org/0000-0002-2637-9683"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lei Qiao","raw_affiliation_strings":["Beijing Institute of Control Engineering, Beijing, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-2637-9683","affiliations":[{"raw_affiliation_string":"Beijing Institute of Control Engineering, Beijing, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006567056","display_name":"Kexue Fu","orcid":"https://orcid.org/0000-0003-1204-0942"},"institutions":[{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]},{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]},{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kexue Fu","raw_affiliation_strings":["Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences), Jinan, Shandong, China","Shandong Provincial Key Laboratory of Computer Networks, Shandong Fundamental Research Center for Computer Science, Jinan, Shandong, China"],"raw_orcid":"https://orcid.org/0000-0003-1204-0942","affiliations":[{"raw_affiliation_string":"Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences), Jinan, Shandong, China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]},{"raw_affiliation_string":"Shandong Provincial Key Laboratory of Computer Networks, Shandong Fundamental Research Center for Computer Science, Jinan, Shandong, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012293028","display_name":"Longxiang Gao","orcid":"https://orcid.org/0000-0002-3026-7537"},"institutions":[{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]},{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]},{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longxiang Gao","raw_affiliation_strings":["Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences), Jinan, Shandong, China","Shandong Provincial Key Laboratory of Computer Networks, Shandong Fundamental Research Center for Computer Science, Jinan, Shandong, China"],"raw_orcid":"https://orcid.org/0000-0002-3026-7537","affiliations":[{"raw_affiliation_string":"Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences), Jinan, Shandong, China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]},{"raw_affiliation_string":"Shandong Provincial Key Laboratory of Computer Networks, Shandong Fundamental Research Center for Computer Science, Jinan, Shandong, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100748818","display_name":"Wang Yi","orcid":"https://orcid.org/0000-0002-2994-6110"},"institutions":[{"id":"https://openalex.org/I123387679","display_name":"Uppsala University","ror":"https://ror.org/048a87296","country_code":"SE","type":"education","lineage":["https://openalex.org/I123387679"]},{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN","SE"],"is_corresponding":false,"raw_author_name":"Wang Yi","raw_affiliation_strings":["School of Computer Science and Engineering, Northeastern Univeristy, Shenyang, Liaoning, China","Uppsala University, Uppsala, Uppsala, Sweden"],"raw_orcid":"https://orcid.org/0000-0002-2994-6110","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Northeastern Univeristy, Shenyang, Liaoning, China","institution_ids":["https://openalex.org/I9224756"]},{"raw_affiliation_string":"Uppsala University, Uppsala, Uppsala, Sweden","institution_ids":["https://openalex.org/I123387679"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.443,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.62891207,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.979200005531311,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9700000286102295,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.5906357765197754},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.5292394161224365},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5158411860466003},{"id":"https://openalex.org/keywords/flow","display_name":"Flow (mathematics)","score":0.49750497937202454},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.219915509223938},{"id":"https://openalex.org/keywords/geometry","display_name":"Geometry","score":0.09835261106491089}],"concepts":[{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.5906357765197754},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.5292394161224365},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5158411860466003},{"id":"https://openalex.org/C38349280","wikidata":"https://www.wikidata.org/wiki/Q1434290","display_name":"Flow (mathematics)","level":2,"score":0.49750497937202454},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.219915509223938},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.09835261106491089},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3649329.3658462","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3649329.3658462","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 61st ACM/IEEE Design Automation Conference","raw_type":"proceedings-article"},{"id":"pmh:oai:figshare.com:article/27683382","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"}],"best_oa_location":{"id":"pmh:oai:figshare.com:article/27683382","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},"sustainable_development_goals":[{"score":0.4099999964237213,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W599934088","https://openalex.org/W2013247896","https://openalex.org/W2046594687","https://openalex.org/W2081373884","https://openalex.org/W2113282196","https://openalex.org/W2156831150","https://openalex.org/W2157371713","https://openalex.org/W2160428323","https://openalex.org/W2167691065","https://openalex.org/W2273440736","https://openalex.org/W2344044374","https://openalex.org/W2798724095","https://openalex.org/W2895519740","https://openalex.org/W3059763123","https://openalex.org/W3102510044","https://openalex.org/W4280633999","https://openalex.org/W4309672181","https://openalex.org/W4367043644","https://openalex.org/W6684599738"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2984005617"],"abstract_inverted_index":{"Kernels":[0],"are":[1],"scheduled":[2,23],"on":[3,85,98],"Graphics":[4],"Processing":[5],"Units":[6],"(GPUs)":[7],"in":[8,42,109],"the":[9,31,56,74,77,86,96,103,114],"granularity":[10],"of":[11,18,76],"GPU":[12],"warp,":[13],"which":[14,90],"is":[15,53],"a":[16,34,43,65,81,110,119],"bunch":[17],"threads":[19,32],"that":[20,102],"must":[21],"be":[22,107],"together.":[24],"When":[25],"executing":[26],"kernels":[27,97],"with":[28,95,113,118],"conditional":[29],"branches,":[30],"within":[33],"warp":[35],"may":[36],"execute":[37],"different":[38],"branches":[39],"sequentially,":[40],"resulting":[41],"considerable":[44],"utilization":[45],"loss":[46],"and":[47],"unpredictable":[48],"execution":[49,71],"time.":[50],"This":[51],"problem":[52,105],"known":[54],"as":[55],"control":[57,131],"flow":[58,132],"divergence.":[59],"In":[60],"this":[61,125],"work,":[62],"we":[63],"propose":[64],"novel":[66],"method":[67],"to":[68],"predict":[69],"threads'":[70],"path":[72],"before":[73],"launch":[75],"kernel":[78],"by":[79],"deploying":[80],"branch":[82],"prediction":[83],"network":[84],"GPU's":[87],"tensor":[88],"cores,":[89,100],"can":[91,106,127],"efficiently":[92],"parallel":[93],"run":[94],"CUDA":[99],"so":[101],"divergence":[104,133],"eased":[108],"large":[111],"extent":[112],"lowest":[115],"overhead.":[116],"Combined":[117],"well-designed":[120],"thread":[121],"data":[122],"reorganization":[123],"algorithm,":[124],"solution":[126],"better":[128],"mitigate":[129],"GPUs'":[130],"problem.":[134]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
