{"id":"https://openalex.org/W2098505406","doi":"https://doi.org/10.1145/2540708.2540718","title":"Divergence-aware warp scheduling","display_name":"Divergence-aware warp scheduling","publication_year":2013,"publication_date":"2013-12-07","ids":{"openalex":"https://openalex.org/W2098505406","doi":"https://doi.org/10.1145/2540708.2540718","mag":"2098505406"},"language":"en","primary_location":{"id":"doi:10.1145/2540708.2540718","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2540708.2540718","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 46th Annual IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038577900","display_name":"Timothy G. Rogers","orcid":"https://orcid.org/0009-0002-0736-3149"},"institutions":[{"id":"https://openalex.org/I141945490","display_name":"University of British Columbia","ror":"https://ror.org/03rmrcq20","country_code":"CA","type":"education","lineage":["https://openalex.org/I141945490"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Timothy G. Rogers","raw_affiliation_strings":["University of British Columbia","Department of Computer and Electrical Engineering, University of British Columbia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of British Columbia","institution_ids":["https://openalex.org/I141945490"]},{"raw_affiliation_string":"Department of Computer and Electrical Engineering, University of British Columbia","institution_ids":["https://openalex.org/I141945490"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055699570","display_name":"Mike O\u2019Connor","orcid":"https://orcid.org/0000-0003-0944-2393"},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mike O'Connor","raw_affiliation_strings":["NVIDIA Research","[NVIDIA Research]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA Research","institution_ids":["https://openalex.org/I1304085615"]},{"raw_affiliation_string":"[NVIDIA Research]","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026788167","display_name":"Tor M. Aamodt","orcid":"https://orcid.org/0000-0003-1161-692X"},"institutions":[{"id":"https://openalex.org/I141945490","display_name":"University of British Columbia","ror":"https://ror.org/03rmrcq20","country_code":"CA","type":"education","lineage":["https://openalex.org/I141945490"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Tor M. Aamodt","raw_affiliation_strings":["University of British Columbia","Department of Computer and Electrical Engineering, University of British Columbia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of British Columbia","institution_ids":["https://openalex.org/I141945490"]},{"raw_affiliation_string":"Department of Computer and Electrical Engineering, University of British Columbia","institution_ids":["https://openalex.org/I141945490"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":18.0784,"has_fulltext":false,"cited_by_count":149,"citation_normalized_percentile":{"value":0.99552187,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"99","last_page":"110"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.806186318397522},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7619689106941223},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7293114066123962},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.5500501394271851},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5063294172286987},{"id":"https://openalex.org/keywords/schedule","display_name":"Schedule","score":0.4278334677219391},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.4175780713558197},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.23415601253509521}],"concepts":[{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.806186318397522},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7619689106941223},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7293114066123962},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.5500501394271851},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5063294172286987},{"id":"https://openalex.org/C68387754","wikidata":"https://www.wikidata.org/wiki/Q7271585","display_name":"Schedule","level":2,"score":0.4278334677219391},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.4175780713558197},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.23415601253509521},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2540708.2540718","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2540708.2540718","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 46th Annual IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.723.8101","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.723.8101","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.ece.ubc.ca/%7Eaamodt/papers/tgrogers.micro2013.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.9200000166893005,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"},{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W1575701986","https://openalex.org/W1979527452","https://openalex.org/W1983235612","https://openalex.org/W2006312753","https://openalex.org/W2020572638","https://openalex.org/W2029577083","https://openalex.org/W2047060659","https://openalex.org/W2048441570","https://openalex.org/W2059226577","https://openalex.org/W2080592089","https://openalex.org/W2090584832","https://openalex.org/W2091002291","https://openalex.org/W2092340922","https://openalex.org/W2093043622","https://openalex.org/W2096661534","https://openalex.org/W2109473404","https://openalex.org/W2124350608","https://openalex.org/W2126570805","https://openalex.org/W2128853364","https://openalex.org/W2142444503","https://openalex.org/W2146451305","https://openalex.org/W2149234156","https://openalex.org/W2150073849","https://openalex.org/W2155503253","https://openalex.org/W2155568054","https://openalex.org/W2156831150","https://openalex.org/W2166918318","https://openalex.org/W2169880332","https://openalex.org/W2172339043","https://openalex.org/W2175014797","https://openalex.org/W2296006986","https://openalex.org/W2333869848","https://openalex.org/W3007769800","https://openalex.org/W3013490664","https://openalex.org/W4205941017","https://openalex.org/W4214898277","https://openalex.org/W4229618172","https://openalex.org/W4234833047","https://openalex.org/W4235870392","https://openalex.org/W4237819810","https://openalex.org/W4247317410","https://openalex.org/W4249082578","https://openalex.org/W4253438286","https://openalex.org/W4255812374"],"related_works":["https://openalex.org/W2167303720","https://openalex.org/W2549803267","https://openalex.org/W2497617944","https://openalex.org/W1563139915","https://openalex.org/W2061075966","https://openalex.org/W3147501184","https://openalex.org/W4256652509","https://openalex.org/W2109715593","https://openalex.org/W2081416538","https://openalex.org/W2140219379"],"abstract_inverted_index":{"This":[0],"paper":[1],"uses":[2,98],"hardware":[3],"thread":[4],"scheduling":[5,80,90],"to":[6,31,41,101,114,125,134,166,183],"improve":[7],"the":[8,62,116,119,127,137,146,150,187],"performance":[9,138,176,204],"and":[10,58,141,160,190],"energy":[11],"efficiency":[12],"of":[13,55,64,118,129,139,153,174,186,214],"divergent":[14,170],"applications":[15],"on":[16,73,83,93,145,210],"GPUs.":[17],"We":[18,148,195],"propose":[19],"Divergence-Aware":[20],"Warp":[21],"Scheduling":[22,209],"(DAWS),":[23],"which":[24,77],"introduces":[25],"a":[26,168,175,200,211],"divergence-based":[27],"cache":[28,37,85,94],"footprint":[29],"predictor":[30],"estimate":[32],"how":[33],"much":[34],"L1":[35,120],"data":[36,106,121],"capacity":[38,117],"is":[39,112,164],"needed":[40],"capture":[42],"intra-warp":[43],"locality":[44,130],"in":[45,68],"loops.":[46],"Predictor":[47],"estimates":[48],"are":[49],"created":[50],"from":[51,132],"an":[52],"online":[53],"characterization":[54],"memory":[56,193],"divergence":[57,67],"runtime":[59],"information":[60],"about":[61],"level":[63],"control":[65],"flow":[66],"warps.":[69],"Unlike":[70],"prior":[71],"work":[72],"Cache-Conscious":[74,207],"Wavefront":[75,208],"Scheduling,":[76],"makes":[78,88],"reactive":[79],"decisions":[81,91],"based":[82,92],"detected":[84],"thrashing,":[86],"DAWS":[87,97,123,163,198],"proactive":[89],"usage":[95],"predictions.":[96],"these":[99],"predictions":[100],"schedule":[102],"warps":[103],"such":[104],"that":[105,162,179,197],"reused":[107],"by":[108],"active":[109],"scalar":[110],"threads":[111],"unlikely":[113],"exceed":[115],"cache.":[122],"attempts":[124],"shift":[126],"burden":[128],"management":[131],"software":[133],"hardware,":[135],"increasing":[136],"simpler":[140],"more":[142],"portable":[143],"code":[144],"GPU.":[147],"compare":[149],"execution":[151],"time":[152],"two":[154],"Sparse":[155],"Matrix":[156],"Vector":[157],"Multiply":[158],"implementations":[159],"show":[161,196],"able":[165],"run":[167],"simple,":[169],"version":[171,178],"within":[172],"4%":[173],"optimized":[177],"has":[180],"been":[181],"rewritten":[182],"make":[184],"use":[185],"on-chip":[188],"scratchpad":[189],"have":[191],"less":[192],"divergence.":[194],"achieves":[199],"harmonic":[201],"mean":[202],"26%":[203],"improvement":[205],"over":[206],"diverse":[212],"selection":[213],"highly":[215],"cache-sensitive":[216],"applications,":[217],"with":[218],"minimal":[219],"additional":[220],"hardware.":[221]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":11},{"year":2018,"cited_by_count":16},{"year":2017,"cited_by_count":30},{"year":2016,"cited_by_count":20},{"year":2015,"cited_by_count":29},{"year":2014,"cited_by_count":8}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
