{"id":"https://openalex.org/W2156831150","doi":"https://doi.org/10.1145/1815961.1815992","title":"Dynamic warp subdivision for integrated branch and memory divergence tolerance","display_name":"Dynamic warp subdivision for integrated branch and memory divergence tolerance","publication_year":2010,"publication_date":"2010-06-19","ids":{"openalex":"https://openalex.org/W2156831150","doi":"https://doi.org/10.1145/1815961.1815992","mag":"2156831150"},"language":"en","primary_location":{"id":"doi:10.1145/1815961.1815992","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1815961.1815992","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 37th annual international symposium on Computer architecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018781096","display_name":"Jiayuan Meng","orcid":"https://orcid.org/0000-0002-1135-6518"},"institutions":[{"id":"https://openalex.org/I51556381","display_name":"University of Virginia","ror":"https://ror.org/0153tk833","country_code":"US","type":"education","lineage":["https://openalex.org/I51556381"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiayuan Meng","raw_affiliation_strings":["University of Virginia, Charlottesville, VA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Virginia, Charlottesville, VA, USA","institution_ids":["https://openalex.org/I51556381"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061578459","display_name":"David Tarjan","orcid":null},"institutions":[{"id":"https://openalex.org/I51556381","display_name":"University of Virginia","ror":"https://ror.org/0153tk833","country_code":"US","type":"education","lineage":["https://openalex.org/I51556381"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Tarjan","raw_affiliation_strings":["University of Virginia, Charlottesville, VA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Virginia, Charlottesville, VA, USA","institution_ids":["https://openalex.org/I51556381"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074818897","display_name":"Kevin Skadron","orcid":"https://orcid.org/0000-0002-8091-9302"},"institutions":[{"id":"https://openalex.org/I51556381","display_name":"University of Virginia","ror":"https://ror.org/0153tk833","country_code":"US","type":"education","lineage":["https://openalex.org/I51556381"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kevin Skadron","raw_affiliation_strings":["University of Virginia, Charlottesville, VA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Virginia, Charlottesville, VA, USA","institution_ids":["https://openalex.org/I51556381"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":20.2417,"has_fulltext":false,"cited_by_count":257,"citation_normalized_percentile":{"value":0.99581784,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"235","last_page":"246"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/subdivision","display_name":"Subdivision","score":0.7657239437103271},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6267359256744385},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.5839729905128479},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5238211750984192},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07438257336616516}],"concepts":[{"id":"https://openalex.org/C143392562","wikidata":"https://www.wikidata.org/wiki/Q449111","display_name":"Subdivision","level":2,"score":0.7657239437103271},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6267359256744385},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.5839729905128479},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5238211750984192},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07438257336616516},{"id":"https://openalex.org/C147176958","wikidata":"https://www.wikidata.org/wiki/Q77590","display_name":"Civil engineering","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1815961.1815992","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1815961.1815992","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 37th annual international symposium on Computer architecture","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1525740151","https://openalex.org/W1567324076","https://openalex.org/W1981319267","https://openalex.org/W2012252449","https://openalex.org/W2037743346","https://openalex.org/W2044206819","https://openalex.org/W2097909406","https://openalex.org/W2105012172","https://openalex.org/W2106625514","https://openalex.org/W2107978915","https://openalex.org/W2108977887","https://openalex.org/W2120692212","https://openalex.org/W2120964511","https://openalex.org/W2128022558","https://openalex.org/W2132587889","https://openalex.org/W2144481293","https://openalex.org/W2145021036","https://openalex.org/W2145866640","https://openalex.org/W2146246439","https://openalex.org/W2148041475","https://openalex.org/W2151982028","https://openalex.org/W2164333604","https://openalex.org/W2168452045","https://openalex.org/W2169150396","https://openalex.org/W2169880332","https://openalex.org/W2170879098","https://openalex.org/W2321262893","https://openalex.org/W2535359146","https://openalex.org/W3144376511","https://openalex.org/W4252076096","https://openalex.org/W6653239367","https://openalex.org/W6828640377"],"related_works":["https://openalex.org/W2117014006","https://openalex.org/W4233815414","https://openalex.org/W2372170743","https://openalex.org/W1558545464","https://openalex.org/W1984303163","https://openalex.org/W1509211761","https://openalex.org/W2358725432","https://openalex.org/W3047022145","https://openalex.org/W99847340","https://openalex.org/W1987204888"],"abstract_inverted_index":{"SIMD":[0,86,112],"organizations":[1],"amortize":[2],"the":[3,64,77,80,121,146,186],"area":[4,25,204],"and":[5,10,26,88,119,165,179,197],"power":[6,27],"of":[7,36,66,79,206],"fetch,":[8],"decode,":[9],"issue":[11],"logic":[12],"across":[13],"multiple":[14,67],"processing":[15],"units":[16],"in":[17,39,145],"order":[18],"to":[19,46,114,139,161,170],"maximize":[20],"throughput":[21,30],"for":[22,123],"a":[23,34,136,189,198],"given":[24],"budget.":[28],"However,":[29],"is":[31,175],"reduced":[32],"when":[33],"set":[35],"threads":[37,105,167],"operating":[38],"lockstep":[40],"(a":[41],"warp)":[42],"are":[43,55,107],"stalled":[44,110],"due":[45],"long":[47],"latency":[48,97,177],"memory":[49,180],"accesses.":[50],"The":[51,173],"resulting":[52],"idle":[53,117],"cycles":[54,118],"extremely":[56],"costly.":[57],"Multi-threading":[58],"can":[59,91],"hide":[60],"latencies":[61],"by":[62,111,222],"interleaving":[63],"execution":[65],"warps,":[68],"but":[69,109],"deep":[70],"multi-threading":[71,124],"using":[72],"many":[73],"warps":[74],"dramatically":[75],"increases":[76],"cost":[78],"register":[81,151],"files":[82],"(multi-threading":[83],"depth":[84],"x":[85],"width),":[87],"cache":[89,191],"contention":[90],"make":[92],"performance":[93,219],"worse.":[94],"Instead,":[95],"intra-warp":[96],"hiding":[98,178],"should":[99],"first":[100],"be":[101],"exploited.":[102],"This":[103,127],"allows":[104,135],"that":[106,168],"ready":[108],"restrictions":[113],"use":[115],"these":[116],"reduces":[120],"need":[122],"among":[125],"warps.":[126],"paper":[128],"introduces":[129],"dynamic":[130],"warp":[131,138],"subdivision":[132],"(DWS),":[133],"which":[134],"single":[137],"occupy":[140],"more":[141],"than":[142,208],"one":[143],"slot":[144],"scheduler":[147],"without":[148],"requiring":[149],"extra":[150],"file":[152],"space.":[153],"Independent":[154],"scheduling":[155],"entities":[156],"allow":[157,166],"divergent":[158],"branch":[159],"paths":[160],"interleave":[162],"their":[163],"execution,":[164],"hit":[169],"run":[171],"ahead.":[172],"result":[174],"improved":[176],"level":[181],"parallelism":[182],"(MLP).":[183],"We":[184],"evaluate":[185],"technique":[187,217],"on":[188,220],"coherent":[190],"hierarchy":[192],"with":[193,211],"private":[194],"L1":[195],"caches":[196],"shared":[199],"L2":[200],"cache.":[201],"With":[202],"an":[203],"overhead":[205],"less":[207],"1%,":[209],"experiments":[210],"eight":[212],"data-parallel":[213],"benchmarks":[214],"show":[215],"our":[216],"improves":[218],"average":[221],"1.7X.":[223]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":12},{"year":2018,"cited_by_count":25},{"year":2017,"cited_by_count":17},{"year":2016,"cited_by_count":15},{"year":2015,"cited_by_count":36},{"year":2014,"cited_by_count":34},{"year":2013,"cited_by_count":36},{"year":2012,"cited_by_count":26}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
