{"id":"https://openalex.org/W2919094923","doi":"https://doi.org/10.1145/3297156.3297201","title":"Control Divergence Optimization through Partial Warp Regrouping in GPGPUs","display_name":"Control Divergence Optimization through Partial Warp Regrouping in GPGPUs","publication_year":2018,"publication_date":"2018-12-08","ids":{"openalex":"https://openalex.org/W2919094923","doi":"https://doi.org/10.1145/3297156.3297201","mag":"2919094923"},"language":"en","primary_location":{"id":"doi:10.1145/3297156.3297201","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3297156.3297201","pdf_url":null,"source":{"id":"https://openalex.org/S4306523626","display_name":"Proceedings of the 2018 2nd International Conference on Computer Science and Artificial Intelligence","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 2nd International Conference on Computer Science and Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022768491","display_name":"Yaohua Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yaohua Yang","raw_affiliation_strings":["School of computer, National University of Defense, Technology"],"affiliations":[{"raw_affiliation_string":"School of computer, National University of Defense, Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101891026","display_name":"Shiqing Zhang","orcid":"https://orcid.org/0000-0002-6690-3718"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shiqing Zhang","raw_affiliation_strings":["School of computer, National University of Defense, Technology"],"affiliations":[{"raw_affiliation_string":"School of computer, National University of Defense, Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042802004","display_name":"Li Shen","orcid":"https://orcid.org/0000-0001-5659-3464"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Shen","raw_affiliation_strings":["School of computer, National University of Defense, Technology"],"affiliations":[{"raw_affiliation_string":"School of computer, National University of Defense, Technology","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5022768491"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.3378,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.53846154,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"369","last_page":"374"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8443756103515625},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7066307067871094},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6912083625793457},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.6665423512458801},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.6569568514823914},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.5948424935340881},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.4933735430240631},{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.4711191654205322},{"id":"https://openalex.org/keywords/running-time","display_name":"Running time","score":0.46388158202171326},{"id":"https://openalex.org/keywords/execution-time","display_name":"Execution time","score":0.45969241857528687},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2157895565032959},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.16170963644981384}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8443756103515625},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7066307067871094},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6912083625793457},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.6665423512458801},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.6569568514823914},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.5948424935340881},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.4933735430240631},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.4711191654205322},{"id":"https://openalex.org/C3017489831","wikidata":"https://www.wikidata.org/wiki/Q2393193","display_name":"Running time","level":2,"score":0.46388158202171326},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.45969241857528687},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2157895565032959},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.16170963644981384},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3297156.3297201","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3297156.3297201","pdf_url":null,"source":{"id":"https://openalex.org/S4306523626","display_name":"Proceedings of the 2018 2nd International Conference on Computer Science and Artificial Intelligence","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 2nd International Conference on Computer Science and Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.5799999833106995,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1970815868","https://openalex.org/W1971997351","https://openalex.org/W1979527452","https://openalex.org/W2047060659","https://openalex.org/W2050710108","https://openalex.org/W2080592089","https://openalex.org/W2090584832","https://openalex.org/W2098505406","https://openalex.org/W2106815796","https://openalex.org/W2148443481","https://openalex.org/W2155568054","https://openalex.org/W2156831150","https://openalex.org/W2160428323","https://openalex.org/W2169880332","https://openalex.org/W2236252626","https://openalex.org/W2237622290","https://openalex.org/W2511318867","https://openalex.org/W2765771514","https://openalex.org/W3146509083","https://openalex.org/W4206561618"],"related_works":["https://openalex.org/W2082509836","https://openalex.org/W1822025191","https://openalex.org/W2102668194","https://openalex.org/W2212845119","https://openalex.org/W1709331905","https://openalex.org/W4297260888","https://openalex.org/W3012609114","https://openalex.org/W2003327526","https://openalex.org/W1838573249","https://openalex.org/W2048012077"],"abstract_inverted_index":{"Recent":[0],"GPUs":[1],"has":[2],"been":[3],"widely":[4],"used":[5],"for":[6],"high":[7],"performance":[8,83],"acceleration":[9],"in":[10,30,110],"the":[11,21,36,45,102,117],"general":[12],"purpose":[13],"computing":[14],"area,":[15],"which":[16],"is":[17],"mainly":[18],"because":[19,72],"of":[20,78,101,119],"SIMT":[22],"(Single-Instruction,":[23],"Multiple-Thread)":[24],"execution":[25,38],"model.":[26],"However,":[27],"when":[28],"threads":[29],"a":[31,76,90],"warp":[32,52,92,104],"do":[33],"not":[34,65],"follow":[35],"same":[37],"path,":[39],"control":[40],"divergence":[41],"generates":[42],"and":[43],"affects":[44],"hardware":[46,120],"utilization.":[47],"Prior":[48],"work":[49],"suggests":[50],"that":[51,64,73,98,126],"regrouping":[53,93,105],"can":[54,68,115,128],"mitigate":[55],"this":[56,86,111],"impact":[57],"to":[58],"some":[59],"extent.":[60],"But":[61],"we":[62,88],"observe":[63],"all":[66],"warps":[67],"be":[69],"regrouped":[70],"effectively":[71],"may":[74],"introduce":[75],"lot":[77],"unnecessary":[79,103],"overheads":[80],"limiting":[81],"further":[82],"improvement.":[84],"In":[85],"paper,":[87],"propose":[89],"lightweight":[91],"method-Partial":[94],"Warp":[95],"Regrouping":[96],"(PWR)":[97],"avoids":[99],"most":[100],"by":[106],"setting":[107],"thresholds.":[108],"And":[109],"method,":[112],"it":[113],"also":[114],"reduce":[116],"complexity":[118],"design.":[121],"Our":[122],"experimental":[123],"evaluation":[124],"shows":[125],"PWR":[127],"provide":[129],"12%":[130],"speedup":[131],"on":[132],"average.":[133]},"counts_by_year":[{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
