{"id":"https://openalex.org/W2022750656","doi":"https://doi.org/10.1145/1926367.1926379","title":"Software-based branch predication for AMD GPUs","display_name":"Software-based branch predication for AMD GPUs","publication_year":2010,"publication_date":"2010-09-14","ids":{"openalex":"https://openalex.org/W2022750656","doi":"https://doi.org/10.1145/1926367.1926379","mag":"2022750656"},"language":"en","primary_location":{"id":"doi:10.1145/1926367.1926379","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1926367.1926379","pdf_url":null,"source":{"id":"https://openalex.org/S4210193905","display_name":"ACM SIGARCH Computer Architecture News","issn_l":"0163-5964","issn":["0163-5964","1943-5851"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320740","host_organization_name":"ACM SIGARCH","host_organization_lineage":["https://openalex.org/P4310320740"],"host_organization_lineage_names":["ACM SIGARCH"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGARCH Computer Architecture News","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007820323","display_name":"Ryan Taylor","orcid":null},"institutions":[{"id":"https://openalex.org/I86501945","display_name":"University of Delaware","ror":"https://ror.org/01sbq1a82","country_code":"US","type":"education","lineage":["https://openalex.org/I86501945"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ryan Taylor","raw_affiliation_strings":["University of Delaware, Newark, DE"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Delaware, Newark, DE","institution_ids":["https://openalex.org/I86501945"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100452133","display_name":"Xiaoming Li","orcid":"https://orcid.org/0000-0002-7397-723X"},"institutions":[{"id":"https://openalex.org/I86501945","display_name":"University of Delaware","ror":"https://ror.org/01sbq1a82","country_code":"US","type":"education","lineage":["https://openalex.org/I86501945"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaoming Li","raw_affiliation_strings":["University of Delaware, Newark, DE"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Delaware, Newark, DE","institution_ids":["https://openalex.org/I86501945"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I86501945"],"apc_list":null,"apc_paid":null,"fwci":0.7593,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.72457706,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"38","issue":"4","first_page":"66","last_page":"72"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8993709683418274},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7491840124130249},{"id":"https://openalex.org/keywords/very-long-instruction-word","display_name":"Very long instruction word","score":0.7228490710258484},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.6352522373199463},{"id":"https://openalex.org/keywords/control-flow","display_name":"Control flow","score":0.558196485042572},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.5320305228233337},{"id":"https://openalex.org/keywords/branch-predictor","display_name":"Branch predictor","score":0.46160760521888733},{"id":"https://openalex.org/keywords/software-pipelining","display_name":"Software pipelining","score":0.44327327609062195},{"id":"https://openalex.org/keywords/instruction-level-parallelism","display_name":"Instruction-level parallelism","score":0.42056286334991455},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.3780038356781006},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2992437779903412},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.15282022953033447}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8993709683418274},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7491840124130249},{"id":"https://openalex.org/C170595534","wikidata":"https://www.wikidata.org/wiki/Q249743","display_name":"Very long instruction word","level":2,"score":0.7228490710258484},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.6352522373199463},{"id":"https://openalex.org/C160191386","wikidata":"https://www.wikidata.org/wiki/Q868299","display_name":"Control flow","level":2,"score":0.558196485042572},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.5320305228233337},{"id":"https://openalex.org/C168522837","wikidata":"https://www.wikidata.org/wiki/Q679552","display_name":"Branch predictor","level":2,"score":0.46160760521888733},{"id":"https://openalex.org/C188854837","wikidata":"https://www.wikidata.org/wiki/Q268469","display_name":"Software pipelining","level":3,"score":0.44327327609062195},{"id":"https://openalex.org/C140763907","wikidata":"https://www.wikidata.org/wiki/Q2714055","display_name":"Instruction-level parallelism","level":3,"score":0.42056286334991455},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.3780038356781006},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2992437779903412},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.15282022953033447}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1926367.1926379","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1926367.1926379","pdf_url":null,"source":{"id":"https://openalex.org/S4210193905","display_name":"ACM SIGARCH Computer Architecture News","issn_l":"0163-5964","issn":["0163-5964","1943-5851"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320740","host_organization_name":"ACM SIGARCH","host_organization_lineage":["https://openalex.org/P4310320740"],"host_organization_lineage_names":["ACM SIGARCH"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGARCH Computer Architecture News","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W1992851788","https://openalex.org/W2040701288","https://openalex.org/W2089482246","https://openalex.org/W2153492376","https://openalex.org/W2163599246","https://openalex.org/W2169810797","https://openalex.org/W2295946175"],"related_works":["https://openalex.org/W2993981457","https://openalex.org/W2115688358","https://openalex.org/W2124831322","https://openalex.org/W1991837811","https://openalex.org/W25845550","https://openalex.org/W2515308961","https://openalex.org/W2152012934","https://openalex.org/W2161926026","https://openalex.org/W128821325","https://openalex.org/W2110550622"],"abstract_inverted_index":{"Branch":[0],"predication":[1,33,70,158,208],"is":[2,77,140,168,210],"a":[3,18,29,66,84,88,143,149,163,211,257],"program":[4,90,246],"transformation":[5,53,214],"technique":[6,71,209,254,268],"that":[7,42,78,156,167,216,248],"combines":[8],"instructions":[9,80,97,101,114,187],"of":[10,13,25,37,46,92,95,105,112,128,145,175,184,192,204,213,222,231,252,266],"multiple":[11,199],"branches":[12],"an":[14],"if":[15,135],"statement":[16],"into":[17,218],"straight-line":[19],"sequence":[20,27],"and":[21,58,102,124,178,198,226,245,255,272],"associates":[22],"each":[23],"instruction":[24,56,161,232],"the":[26,35,55,93,103,121,129,172,182,190,220,223,234,250,262],"with":[28,195,236],"predicate.":[30],"The":[31,74,202,264],"branch":[32,38,69,79,96,157,200,207],"improves":[34,54],"execution":[36,45,174,230],"statements":[39],"on":[40,233,269],"processors":[41],"support":[43],"predicated":[44,229],"instruction,":[47],"e.g.,":[48],"Intel":[49],"IA-64,":[50],"because":[51,91],"such":[52],"scheduling":[57],"might":[59],"help":[60],"cache":[61],"performance.":[62],"This":[63],"paper":[64,153],"proposes":[65],"novel":[67,206],"software-based":[68,228],"for":[72,87,261],"GPU.":[73],"main":[75],"motivation":[76],"can":[81,132,159,179],"easily":[82],"become":[83],"performance":[85,191],"bottleneck":[86],"GPU":[89,165,193,224,235],"cost":[94],"compared":[98],"to":[99,110,120,170,238],"ALU":[100,107,113],"possibility":[104],"low":[106],"utilization":[108],"due":[109],"separation":[111],"within":[115],"control":[116,185],"flow":[117,186],"blocks.":[118],"Due":[119],"SIMD":[122],"nature":[123],"massive":[125],"multi-threading":[126],"architecture":[127,225],"GPU,":[130],"branching":[131],"be":[133],"costly":[134],"more":[136],"than":[137],"one":[138],"path":[139],"taken":[141],"by":[142],"set":[144,212],"concurrent":[146],"threads":[147],"in":[148],"kernel.":[150],"In":[151],"this":[152],"we":[154,242],"reveal":[155],"enable":[160],"packing,":[162],"VLIW-like":[164],"feature":[166],"designed":[169],"increase":[171],"parallel":[173],"independent":[176],"instructions,":[177],"also":[180],"decrease":[181],"number":[183],"thereby":[188],"improving":[189],"kernels":[194],"both":[196],"single":[197],"paths.":[201],"key":[203],"our":[205,253,267],"rules":[215],"takes":[217],"consideration":[219],"specialties":[221],"implements":[227],"little":[237],"no":[239],"overhead.":[240],"Furthermore,":[241],"identify":[243],"architectural":[244],"factors":[247],"affect":[249],"effectiveness":[251],"build":[256],"benefit":[258],"analysis":[259],"model":[260],"transformation.":[263],"implementation":[265],"synthetic":[270],"benchmarks":[271],"real-world":[273],"application":[274],"proves":[275],"its":[276],"effectiveness.":[277]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":1}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
