{"id":"https://openalex.org/W2018150881","doi":"https://doi.org/10.1145/2166879.2166882","title":"A Hierarchical Thread Scheduler and Register File for Energy-Efficient Throughput Processors","display_name":"A Hierarchical Thread Scheduler and Register File for Energy-Efficient Throughput Processors","publication_year":2012,"publication_date":"2012-04-01","ids":{"openalex":"https://openalex.org/W2018150881","doi":"https://doi.org/10.1145/2166879.2166882","mag":"2018150881"},"language":"en","primary_location":{"id":"doi:10.1145/2166879.2166882","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2166879.2166882","pdf_url":null,"source":{"id":"https://openalex.org/S193109227","display_name":"ACM Transactions on Computer Systems","issn_l":"0734-2071","issn":["0734-2071","1557-7333"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Computer Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003674142","display_name":"Mark Gebhart","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Mark Gebhart","raw_affiliation_strings":["The University of Texas at Austin","The University of Texas, at Austin"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"The University of Texas, at Austin","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100729407","display_name":"Daniel Johnson","orcid":"https://orcid.org/0000-0002-1188-8610"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel R. Johnson","raw_affiliation_strings":["University of Illinois at Urbana-Champaign","University of Illinois at Urbana Champaign"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"University of Illinois at Urbana Champaign","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061578459","display_name":"David Tarjan","orcid":null},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"David Tarjan","raw_affiliation_strings":["NVIDIA","Nvidia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]},{"raw_affiliation_string":"Nvidia","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112796474","display_name":"Stephen W. Keckler","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stephen W. Keckler","raw_affiliation_strings":["NVIDIA and The University of Texas at Austin","NVIDIA and The University of Texas at Austin#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA and The University of Texas at Austin","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"NVIDIA and The University of Texas at Austin#TAB#","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084342236","display_name":"William J. Dally","orcid":"https://orcid.org/0000-0003-4632-2876"},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]},{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"William J. Dally","raw_affiliation_strings":["NVIDIA and Stanford University","NVIDIA and Stanford University#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA and Stanford University","institution_ids":["https://openalex.org/I1304085615","https://openalex.org/I97018004"]},{"raw_affiliation_string":"NVIDIA and Stanford University#TAB#","institution_ids":["https://openalex.org/I1304085615","https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089345091","display_name":"Erik Lindholm","orcid":null},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Erik Lindholm","raw_affiliation_strings":["NVIDIA","Nvidia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]},{"raw_affiliation_string":"Nvidia","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074818897","display_name":"Kevin Skadron","orcid":"https://orcid.org/0000-0002-8091-9302"},"institutions":[{"id":"https://openalex.org/I51556381","display_name":"University of Virginia","ror":"https://ror.org/0153tk833","country_code":"US","type":"education","lineage":["https://openalex.org/I51556381"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kevin Skadron","raw_affiliation_strings":["University of Virginia","University of Virginia'"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Virginia","institution_ids":["https://openalex.org/I51556381"]},{"raw_affiliation_string":"University of Virginia'","institution_ids":["https://openalex.org/I51556381"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5003674142"],"corresponding_institution_ids":["https://openalex.org/I86519309"],"apc_list":null,"apc_paid":null,"fwci":3.5146,"has_fulltext":false,"cited_by_count":31,"citation_normalized_percentile":{"value":0.92421776,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"30","issue":"2","first_page":"1","last_page":"38"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8916707038879395},{"id":"https://openalex.org/keywords/register-file","display_name":"Register file","score":0.8234798312187195},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.7531434297561646},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5949385762214661},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.4948975443840027},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.47732383012771606},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.43510979413986206},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.31148087978363037},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.13861212134361267}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8916707038879395},{"id":"https://openalex.org/C117280010","wikidata":"https://www.wikidata.org/wiki/Q180944","display_name":"Register file","level":3,"score":0.8234798312187195},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.7531434297561646},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5949385762214661},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.4948975443840027},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.47732383012771606},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.43510979413986206},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.31148087978363037},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.13861212134361267},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2166879.2166882","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2166879.2166882","pdf_url":null,"source":{"id":"https://openalex.org/S193109227","display_name":"ACM Transactions on Computer Systems","issn_l":"0734-2071","issn":["0734-2071","1557-7333"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Computer Systems","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.299.9316","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.299.9316","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.virginia.edu/~skadron/Papers/gebhart_tocs.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.9100000262260437}],"awards":[{"id":"https://openalex.org/G4290976211","display_name":null,"funder_award_id":"CCF-0936700","funder_id":"https://openalex.org/F4320337387","funder_display_name":"Division of Computing and Communication Foundations"},{"id":"https://openalex.org/G734899754","display_name":null,"funder_award_id":"HR0011-10-9-0008","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"}],"funders":[{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320337387","display_name":"Division of Computing and Communication Foundations","ror":"https://ror.org/01mng8331"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W1979527452","https://openalex.org/W1980364632","https://openalex.org/W1985255904","https://openalex.org/W2000718530","https://openalex.org/W2029076579","https://openalex.org/W2033597569","https://openalex.org/W2057404685","https://openalex.org/W2058320534","https://openalex.org/W2062603907","https://openalex.org/W2064486455","https://openalex.org/W2072768743","https://openalex.org/W2078239989","https://openalex.org/W2080285119","https://openalex.org/W2080592089","https://openalex.org/W2080732703","https://openalex.org/W2089363288","https://openalex.org/W2091919971","https://openalex.org/W2103078980","https://openalex.org/W2108039095","https://openalex.org/W2110195531","https://openalex.org/W2110263160","https://openalex.org/W2112959014","https://openalex.org/W2120726537","https://openalex.org/W2127578285","https://openalex.org/W2128046183","https://openalex.org/W2129760904","https://openalex.org/W2130124967","https://openalex.org/W2135270610","https://openalex.org/W2137860371","https://openalex.org/W2143032080","https://openalex.org/W2144036270","https://openalex.org/W2144481293","https://openalex.org/W2147692971","https://openalex.org/W2148051985","https://openalex.org/W2148060071","https://openalex.org/W2148425700","https://openalex.org/W2152496451","https://openalex.org/W2153039279","https://openalex.org/W2155468002","https://openalex.org/W2169150396","https://openalex.org/W2169528027","https://openalex.org/W2169749035","https://openalex.org/W2169823452","https://openalex.org/W2464177207","https://openalex.org/W3150612471","https://openalex.org/W4206010581","https://openalex.org/W4247913514","https://openalex.org/W4250814284","https://openalex.org/W4253600508","https://openalex.org/W4300425614"],"related_works":["https://openalex.org/W2096854345","https://openalex.org/W2045555750","https://openalex.org/W4239584669","https://openalex.org/W788524553","https://openalex.org/W4250432526","https://openalex.org/W2101536355","https://openalex.org/W2009783759","https://openalex.org/W2168921806","https://openalex.org/W4245282684","https://openalex.org/W74994880"],"abstract_inverted_index":{"Modern":[0],"graphics":[1,218],"processing":[2],"units":[3],"(GPUs)":[4],"employ":[5],"a":[6,24,31,64,70,85,126,156,161,181,190,213,230],"large":[7,32],"number":[8,98,139,147],"of":[9,43,73,88,99,140,148,203,215,232],"hardware":[10],"threads":[11,75,90,100],"to":[12,38,76,91],"hide":[13,77,92],"both":[14,40,155],"function":[15],"unit":[16],"and":[17,45,79,84,145,160,219,239],"memory":[18,81,94],"access":[19,39,82],"latency.":[20,46,95],"Extreme":[21],"multithreading":[22],"requires":[23],"complex":[25],"thread":[26,66,187,224],"scheduler":[27,67,103,188],"as":[28,30,59],"well":[29],"register":[33,119,128,176,183,205,245,249],"file,":[34,184],"which":[35],"is":[36,167],"expensive":[37],"in":[41,142,193,199],"terms":[42],"energy":[44,54,111,194,251],"We":[47,130,153],"present":[48],"two":[49],"complementary":[50],"techniques":[51],"for":[52,134,169,208],"reducing":[53],"on":[55,122,237],"massively-threaded":[56],"processors":[57],"such":[58],"GPUs.":[60],"First,":[61],"we":[62,114],"investigate":[63],"two-level":[65,186],"that":[68,101],"maintains":[69],"small":[71],"set":[72,87],"active":[74,209,223],"ALU":[78],"local":[80],"latency":[83],"larger":[86],"pending":[89],"main":[93],"Reducing":[96],"the":[97,102,109,117,135,138,143,146,165,175,200,204,222],"must":[104],"consider":[105,154],"each":[106,151],"cycle":[107],"improves":[108],"scheduler\u2019s":[110],"efficiency.":[112],"Second,":[113],"propose":[115],"replacing":[116],"monolithic":[118],"file":[120,177,206,246,250],"found":[121],"modern":[123],"designs":[124],"with":[125,180,234],"hierarchical":[127,182],"file.":[129],"explore":[131],"various":[132],"trade-offs":[133],"hierarchy":[136,144,207,247],"including":[137],"levels":[141,202],"entries":[149,198],"at":[150],"level.":[152],"hardware-managed":[157],"caching":[158],"scheme":[159],"software-managed":[162,244],"scheme,":[163],"where":[164],"compiler":[166],"responsible":[168],"orchestrating":[170],"all":[171],"data":[172],"movement":[173],"within":[174],"hierarchy.":[178],"Combined":[179],"our":[185,240],"provides":[189],"further":[191],"reduction":[192],"by":[195,229,252],"only":[196],"allocating":[197],"upper":[201],"threads.":[210],"Averaging":[211],"across":[212],"variety":[214],"real":[216],"world":[217],"compute":[220],"workloads,":[221],"count":[225],"can":[226],"be":[227],"reduced":[228],"factor":[231],"4":[233],"minimal":[235],"impact":[236],"performance":[238],"most":[241],"efficient":[242],"three-level":[243],"reduces":[248],"54%.":[253]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":8},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":6},{"year":2014,"cited_by_count":5},{"year":2013,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
