{"id":"https://openalex.org/W2804703043","doi":"https://doi.org/10.1504/ijaacs.2018.10013261","title":"Using program branch probability for the thread parallelisation of branch divergence on the CUDA platform","display_name":"Using program branch probability for the thread parallelisation of branch divergence on the CUDA platform","publication_year":2018,"publication_date":"2018-01-01","ids":{"openalex":"https://openalex.org/W2804703043","doi":"https://doi.org/10.1504/ijaacs.2018.10013261","mag":"2804703043"},"language":"en","primary_location":{"id":"doi:10.1504/ijaacs.2018.10013261","is_oa":false,"landing_page_url":"https://doi.org/10.1504/ijaacs.2018.10013261","pdf_url":null,"source":{"id":"https://openalex.org/S191948775","display_name":"International Journal of Autonomous and Adaptive Communications Systems","issn_l":"1754-8632","issn":["1754-8632","1754-8640"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310317825","host_organization_name":"Inderscience Publishers","host_organization_lineage":["https://openalex.org/P4310317825"],"host_organization_lineage_names":["Inderscience Publishers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Autonomous and Adaptive Communications Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102847882","display_name":"Caifeng Zou","orcid":"https://orcid.org/0000-0001-7842-0310"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Caifeng Zou","raw_affiliation_strings":["School of Computer Science and Engineering, South China, University of Technology, Guangzhou 510006, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, South China, University of Technology, Guangzhou 510006, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056693481","display_name":"Huifang Deng","orcid":"https://orcid.org/0000-0001-6394-4496"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huifang Deng","raw_affiliation_strings":["School of Computer Science and Engineering, South China, University of Technology, Guangzhou 510006, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, South China, University of Technology, Guangzhou 510006, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101155842","display_name":"Yao Hong","orcid":null},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hong Yao","raw_affiliation_strings":["School of Computer Science and Engineering, South China, University of Technology, Guangzhou 510006, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, South China, University of Technology, Guangzhou 510006, China","institution_ids":["https://openalex.org/I90610280"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102847882"],"corresponding_institution_ids":["https://openalex.org/I90610280"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05842092,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"11","issue":"2","first_page":"171","last_page":"171"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8722646236419678},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.8197307586669922},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7562763690948486},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.7311123609542847},{"id":"https://openalex.org/keywords/branch-predictor","display_name":"Branch predictor","score":0.5955855846405029},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.33981287479400635},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.30365580320358276}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8722646236419678},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.8197307586669922},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7562763690948486},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.7311123609542847},{"id":"https://openalex.org/C168522837","wikidata":"https://www.wikidata.org/wiki/Q679552","display_name":"Branch predictor","level":2,"score":0.5955855846405029},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.33981287479400635},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.30365580320358276}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1504/ijaacs.2018.10013261","is_oa":false,"landing_page_url":"https://doi.org/10.1504/ijaacs.2018.10013261","pdf_url":null,"source":{"id":"https://openalex.org/S191948775","display_name":"International Journal of Autonomous and Adaptive Communications Systems","issn_l":"1754-8632","issn":["1754-8632","1754-8640"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310317825","host_organization_name":"Inderscience Publishers","host_organization_lineage":["https://openalex.org/P4310317825"],"host_organization_lineage_names":["Inderscience Publishers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Autonomous and Adaptive Communications Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.41999998688697815,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3213381848","https://openalex.org/W2017587301","https://openalex.org/W4242265082","https://openalex.org/W4205222143","https://openalex.org/W4249960043","https://openalex.org/W2005148983","https://openalex.org/W4386975487","https://openalex.org/W2096672917","https://openalex.org/W2009169896","https://openalex.org/W2392023973"],"abstract_inverted_index":{"Virtualisation":[0],"environment":[1],"can":[2,60,71,86,121],"bring":[3],"more":[4],"flexibility":[5],"for":[6,101,139],"parallel":[7],"optimisation.":[8],"In":[9],"view":[10],"of":[11,50,106,118,141],"this,":[12],"we":[13,133],"focus":[14],"on":[15,54],"the":[16,48,55,68,99,102],"divergent":[17,142],"branch":[18,27,110,127,143],"problem":[19],"within":[20],"a":[21,64,125,135],"SIMT":[22],"architecture,":[23],"where":[24],"threads":[25,45,89],"with":[26],"divergence":[28],"should":[29],"be":[30,61,72,122],"serially":[31],"executed.":[32],"Existing":[33],"approaches":[34],"are":[35],"normally":[36],"costly":[37],"and":[38,111,115],"not":[39],"so":[40],"satisfactory":[41],"in":[42,63,75],"vectorising":[43],"these":[44,58],"due":[46],"to":[47,94],"constraints":[49,59],"private":[51,69],"variables.":[52],"However,":[53],"other":[56],"hand,":[57],"released":[62],"virtualised":[65,81],"environment,":[66],"because":[67],"resources":[70],"avoided":[73],"putting":[74],"use":[76],"directly":[77],"by":[78,124],"applications.":[79],"For":[80],"CUDA":[82],"platforms,":[83],"our":[84],"approach":[85],"converge":[87],"isomorphic":[88],"into":[90],"same":[91],"redundant":[92,119],"warps":[93,120],"eliminate":[95],"divergence.":[96],"We":[97],"introduce":[98],"algorithms":[100],"thread":[103],"recombination":[104],"models":[105],"binary":[107],"branches,":[108],"single":[109],"multiple":[112],"branches":[113],"respectively,":[114],"each":[116],"number":[117],"determined":[123],"program":[126],"probability.":[128],"Without":[129],"redesigning":[130],"hardware":[131],"needed,":[132],"obtained":[134],"load":[136],"balance":[137],"schema":[138],"parallelisation":[140],"threads.":[144]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
