{"id":"https://openalex.org/W3007718266","doi":"https://doi.org/10.1145/3368826.3377918","title":"Optimizing occupancy and ILP on the GPU using a combinatorial approach","display_name":"Optimizing occupancy and ILP on the GPU using a combinatorial approach","publication_year":2020,"publication_date":"2020-02-21","ids":{"openalex":"https://openalex.org/W3007718266","doi":"https://doi.org/10.1145/3368826.3377918","mag":"3007718266"},"language":"en","primary_location":{"id":"doi:10.1145/3368826.3377918","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3368826.3377918","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3368826.3377918","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th ACM/IEEE International Symposium on Code Generation and Optimization","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3368826.3377918","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009270700","display_name":"Ghassan Shobaki","orcid":"https://orcid.org/0000-0001-8727-671X"},"institutions":[{"id":"https://openalex.org/I43522216","display_name":"California State University, Sacramento","ror":"https://ror.org/03e26wv14","country_code":"US","type":"education","lineage":["https://openalex.org/I43522216"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ghassan Shobaki","raw_affiliation_strings":["California State University at Sacramento, USA"],"affiliations":[{"raw_affiliation_string":"California State University at Sacramento, USA","institution_ids":["https://openalex.org/I43522216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055443277","display_name":"Austin Kerbow","orcid":"https://orcid.org/0000-0001-7314-5516"},"institutions":[{"id":"https://openalex.org/I4210137977","display_name":"Advanced Micro Devices (United States)","ror":"https://ror.org/04kd6c783","country_code":"US","type":"company","lineage":["https://openalex.org/I4210137977"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Austin Kerbow","raw_affiliation_strings":["Advanced Micro Devices, USA"],"affiliations":[{"raw_affiliation_string":"Advanced Micro Devices, USA","institution_ids":["https://openalex.org/I4210137977"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042060721","display_name":"Stanislav Mekhanoshin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210137977","display_name":"Advanced Micro Devices (United States)","ror":"https://ror.org/04kd6c783","country_code":"US","type":"company","lineage":["https://openalex.org/I4210137977"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stanislav Mekhanoshin","raw_affiliation_strings":["Advanced Micro Devices, USA"],"affiliations":[{"raw_affiliation_string":"Advanced Micro Devices, USA","institution_ids":["https://openalex.org/I4210137977"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5009270700"],"corresponding_institution_ids":["https://openalex.org/I43522216"],"apc_list":null,"apc_paid":null,"fwci":1.4143,"has_fulltext":true,"cited_by_count":16,"citation_normalized_percentile":{"value":0.79726491,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"133","last_page":"144"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11522","display_name":"VLSI and FPGA Design Techniques","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/occupancy","display_name":"Occupancy","score":0.8311070203781128},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6700130701065063},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5228070020675659},{"id":"https://openalex.org/keywords/combinatorial-analysis","display_name":"Combinatorial analysis","score":0.433694064617157},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.41126859188079834},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.3423677384853363},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.22847995162010193},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16976100206375122},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.11771145462989807},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09010255336761475},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.08760467171669006},{"id":"https://openalex.org/keywords/architectural-engineering","display_name":"Architectural engineering","score":0.060864537954330444}],"concepts":[{"id":"https://openalex.org/C160331591","wikidata":"https://www.wikidata.org/wiki/Q7075743","display_name":"Occupancy","level":2,"score":0.8311070203781128},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6700130701065063},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5228070020675659},{"id":"https://openalex.org/C2987825933","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorial analysis","level":2,"score":0.433694064617157},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.41126859188079834},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3423677384853363},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.22847995162010193},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16976100206375122},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.11771145462989807},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09010255336761475},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.08760467171669006},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.060864537954330444}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3368826.3377918","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3368826.3377918","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3368826.3377918","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th ACM/IEEE International Symposium on Code Generation and Optimization","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3368826.3377918","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3368826.3377918","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3368826.3377918","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th ACM/IEEE International Symposium on Code Generation and Optimization","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2181105169","display_name":null,"funder_award_id":"1911235","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3007718266.pdf","grobid_xml":"https://content.openalex.org/works/W3007718266.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W69255761","https://openalex.org/W106298309","https://openalex.org/W1521227491","https://openalex.org/W2017069727","https://openalex.org/W2024598204","https://openalex.org/W2041909047","https://openalex.org/W2053505858","https://openalex.org/W2058372704","https://openalex.org/W2111130897","https://openalex.org/W2121056548","https://openalex.org/W2121088529","https://openalex.org/W2122230401","https://openalex.org/W2126830109","https://openalex.org/W2146995753","https://openalex.org/W2155695023","https://openalex.org/W2171595223","https://openalex.org/W2227036781","https://openalex.org/W2278840053","https://openalex.org/W2554796001","https://openalex.org/W2737572435","https://openalex.org/W2786544209","https://openalex.org/W2794150393","https://openalex.org/W2798970645","https://openalex.org/W2902566644","https://openalex.org/W2916770803","https://openalex.org/W2962770763","https://openalex.org/W4235568885","https://openalex.org/W4236145149","https://openalex.org/W4255927621"],"related_works":["https://openalex.org/W4282043467","https://openalex.org/W2105697914","https://openalex.org/W3093197249","https://openalex.org/W1968324288","https://openalex.org/W1980160788","https://openalex.org/W1656096860","https://openalex.org/W2095928260","https://openalex.org/W2268149564","https://openalex.org/W1984739956","https://openalex.org/W2763312740"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"the":[3,8,62,102,105,114,121,126,131,141,158,187,218,241],"first":[4,103,132],"general":[5],"solution":[6],"to":[7,60,146,164,199,202,206,209,214,217,228,233,240,258],"problem":[9,53,70,87],"of":[10,43,54,255],"optimizing":[11],"both":[12],"occupancy":[13,40,90,128],"and":[14,95,144,172,212,237],"Instruction-Level":[15],"Parallelism":[16],"(ILP)":[17],"when":[18],"compiling":[19],"for":[20,84,108,120],"a":[21,67,79,92,98,109,180,252],"Graphics":[22],"Processing":[23],"Unit":[24],"(GPU).":[25],"Exploiting":[26],"ILP":[27,96],"(minimizing":[28],"schedule":[29,123],"length)":[30],"requires":[31],"using":[32,36,155],"more":[33,37,246],"registers,":[34],"but":[35],"registers":[38],"decreases":[39],"(the":[41],"number":[42],"thread":[44],"groups":[45],"that":[46,124,178,186],"can":[47,249,261],"be":[48,250,262],"run":[49],"in":[50,71,113,130,140],"parallel).":[51],"The":[52,134,150,183,222],"balancing":[55],"these":[56],"two":[57],"conflicting":[58],"objectives":[59],"achieve":[61],"best":[63],"overall":[64],"performance":[65,152],"is":[66],"challenging":[68],"open":[69],"code":[72],"optimization.":[73],"In":[74,101],"this":[75,86],"paper,":[76],"we":[77],"present":[78],"two-pass":[80],"Branch-and-Bound":[81],"(B&B)":[82],"algorithm":[83,106,137,171,177,191],"solving":[85],"by":[88,197],"treating":[89],"as":[91,97],"primary":[93],"objective":[94],"secondary":[99],"objective.":[100],"pass,":[104],"searches":[107,119],"maximum-occupancy":[110],"schedule,":[111],"while":[112],"second":[115],"pass":[116],"it":[117],"iteratively":[118],"shortest":[122],"gives":[125],"maximum":[127],"found":[129],"pass.":[133],"proposed":[135,188],"scheduling":[136,166,170,176,190],"was":[138,153],"implemented":[139],"LLVM":[142],"compiler":[143],"applied":[145],"an":[147,173],"AMD":[148],"GPU.":[149],"algorithm\u2019s":[151],"evaluated":[154],"benchmarks":[156],"from":[157],"PlaidML":[159],"machine":[160],"learning":[161],"framework":[162],"relative":[163,201,208,216,227,232,239,257],"LLVM\u2019s":[165,203,229],"algorithm,":[167],"AMD\u2019s":[168,210,234,259],"production":[169,235],"existing":[174,219,242],"B&B":[175,189,220,243],"uses":[179],"different":[181],"approach.":[182],"results":[184],"show":[185],"speeds":[192],"up":[193,198,205,213],"almost":[194],"every":[195],"benchmark":[196],"35%":[200],"scheduler,":[204,230],"31%":[207],"scheduler":[211,236,260],"18%":[215],"scheduler.":[221,244],"geometric-mean":[223,253],"improvements":[224],"are":[225],"16.3%":[226],"5.5%":[231],"6.2%":[238],"If":[245],"compile":[247],"time":[248],"tolerated,":[251],"improvement":[254],"6.3%":[256],"achieved.":[263]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
