{"id":"https://openalex.org/W4413286977","doi":"https://doi.org/10.1145/3760782","title":"A Low-latency On-chip Cache Hierarchy for Load-to-use Stall Reduction in GPUs","display_name":"A Low-latency On-chip Cache Hierarchy for Load-to-use Stall Reduction in GPUs","publication_year":2025,"publication_date":"2025-08-18","ids":{"openalex":"https://openalex.org/W4413286977","doi":"https://doi.org/10.1145/3760782"},"language":"en","primary_location":{"id":"doi:10.1145/3760782","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3760782","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3760782","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3760782","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053374807","display_name":"Negin Mahani","orcid":"https://orcid.org/0000-0001-5232-3539"},"institutions":[{"id":"https://openalex.org/I115566878","display_name":"Shahid Bahonar University of Kerman","ror":"https://ror.org/04zn42r77","country_code":"IR","type":"education","lineage":["https://openalex.org/I115566878"]},{"id":"https://openalex.org/I133529467","display_name":"Sharif University of Technology","ror":"https://ror.org/024c2fq17","country_code":"IR","type":"education","lineage":["https://openalex.org/I133529467"]},{"id":"https://openalex.org/I2799803557","display_name":"Barcelona Supercomputing Center","ror":"https://ror.org/05sd8tv96","country_code":"ES","type":"facility","lineage":["https://openalex.org/I2799803557","https://openalex.org/I9617848"]},{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES","IR"],"is_corresponding":false,"raw_author_name":"Negin (Sadat) (Nematollahi zadeh) Mahani","raw_affiliation_strings":["Computer Science, Barcelona Supercomputing Center","Shahid Bahonar University of Kerman","Sharif University of Technology","Shahid Bahonar University of Kerman, Kerman, Iran (the Islamic Republic of)","Computer Science, Barcelona Supercomputing Center, Barcelona, Spain","Sharif University of Technology, Tehran, Iran (the Islamic Republic of)"],"raw_orcid":"https://orcid.org/0000-0001-5232-3539","affiliations":[{"raw_affiliation_string":"Computer Science, Barcelona Supercomputing Center","institution_ids":["https://openalex.org/I9617848","https://openalex.org/I2799803557"]},{"raw_affiliation_string":"Shahid Bahonar University of Kerman","institution_ids":["https://openalex.org/I115566878"]},{"raw_affiliation_string":"Sharif University of Technology","institution_ids":["https://openalex.org/I133529467"]},{"raw_affiliation_string":"Shahid Bahonar University of Kerman, Kerman, Iran (the Islamic Republic of)","institution_ids":["https://openalex.org/I115566878"]},{"raw_affiliation_string":"Computer Science, Barcelona Supercomputing Center, Barcelona, Spain","institution_ids":["https://openalex.org/I9617848","https://openalex.org/I2799803557"]},{"raw_affiliation_string":"Sharif University of Technology, Tehran, Iran (the Islamic Republic of)","institution_ids":["https://openalex.org/I133529467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019048190","display_name":"Hajar Falahati","orcid":"https://orcid.org/0000-0001-8375-3339"},"institutions":[{"id":"https://openalex.org/I2799803557","display_name":"Barcelona Supercomputing Center","ror":"https://ror.org/05sd8tv96","country_code":"ES","type":"facility","lineage":["https://openalex.org/I2799803557","https://openalex.org/I9617848"]},{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Hajar Falahati","raw_affiliation_strings":["Barcelona Supercomputing Center","Barcelona Supercomputing Center, Barcelona, Spain"],"raw_orcid":"https://orcid.org/0000-0001-8375-3339","affiliations":[{"raw_affiliation_string":"Barcelona Supercomputing Center","institution_ids":["https://openalex.org/I9617848","https://openalex.org/I2799803557"]},{"raw_affiliation_string":"Barcelona Supercomputing Center, Barcelona, Spain","institution_ids":["https://openalex.org/I9617848","https://openalex.org/I2799803557"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072312467","display_name":"Sina Darabi","orcid":"https://orcid.org/0000-0002-7082-123X"},"institutions":[{"id":"https://openalex.org/I57201433","display_name":"Universit\u00e0 della Svizzera italiana","ror":"https://ror.org/03c4atk17","country_code":"CH","type":"education","lineage":["https://openalex.org/I57201433"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Sina Darabi","raw_affiliation_strings":["Universit\u00e0 della Svizzera italiana"],"raw_orcid":"https://orcid.org/0000-0002-7082-123X","affiliations":[{"raw_affiliation_string":"Universit\u00e0 della Svizzera italiana","institution_ids":["https://openalex.org/I57201433"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119334424","display_name":"Ahmad Javadi-Nezhad","orcid":null},"institutions":[{"id":"https://openalex.org/I133529467","display_name":"Sharif University of Technology","ror":"https://ror.org/024c2fq17","country_code":"IR","type":"education","lineage":["https://openalex.org/I133529467"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Ahmad Javadi-Nezhad","raw_affiliation_strings":["Sharif University of Technology","Sharif University of Technology, Tehran, Iran (the Islamic Republic of)"],"raw_orcid":"https://orcid.org/0009-0004-4444-150X","affiliations":[{"raw_affiliation_string":"Sharif University of Technology","institution_ids":["https://openalex.org/I133529467"]},{"raw_affiliation_string":"Sharif University of Technology, Tehran, Iran (the Islamic Republic of)","institution_ids":["https://openalex.org/I133529467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015526090","display_name":"Yunho Oh","orcid":"https://orcid.org/0000-0001-6442-3705"},"institutions":[{"id":"https://openalex.org/I197347611","display_name":"Korea University","ror":"https://ror.org/047dqcg40","country_code":"KR","type":"education","lineage":["https://openalex.org/I197347611"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Yunho Oh","raw_affiliation_strings":["School of Electrical Engineering, Korea University","School of Electrical Engineering, Korea University, Seoul, Korea (the Republic of)"],"raw_orcid":"https://orcid.org/0000-0001-6442-3705","affiliations":[{"raw_affiliation_string":"School of Electrical Engineering, Korea University","institution_ids":["https://openalex.org/I197347611"]},{"raw_affiliation_string":"School of Electrical Engineering, Korea University, Seoul, Korea (the Republic of)","institution_ids":["https://openalex.org/I197347611"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008240365","display_name":"Mohammad Sadrosadati","orcid":"https://orcid.org/0000-0002-4029-0175"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]},{"id":"https://openalex.org/I4210110659","display_name":"Z\u00fcrcher Fachhochschule","ror":"https://ror.org/01sxmzj91","country_code":"CH","type":"education","lineage":["https://openalex.org/I4210110659"]},{"id":"https://openalex.org/I4210155191","display_name":"Dialog Ethik","ror":"https://ror.org/05cbxy397","country_code":"CH","type":"healthcare","lineage":["https://openalex.org/I4210155191"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Mohammad Sadrosadati","raw_affiliation_strings":["ETH","ETH, Zurikh Switzerland"],"raw_orcid":"https://orcid.org/0000-0002-4029-0175","affiliations":[{"raw_affiliation_string":"ETH","institution_ids":[]},{"raw_affiliation_string":"ETH, Zurikh Switzerland","institution_ids":["https://openalex.org/I4210110659","https://openalex.org/I35440088","https://openalex.org/I4210155191"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040238844","display_name":"Hamid Sarbazi\u2010Azad","orcid":"https://orcid.org/0000-0003-4079-8603"},"institutions":[{"id":"https://openalex.org/I133529467","display_name":"Sharif University of Technology","ror":"https://ror.org/024c2fq17","country_code":"IR","type":"education","lineage":["https://openalex.org/I133529467"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Hamid Sarbazi-Azad","raw_affiliation_strings":["Electrical & Computer Engineering, Sharif University of Technology","IPM","Electrical & Computer Engineering, Sharif University of Technology, Tehran, Iran (the Islamic Republic of)","IPM, Tehran, Iran (the Islamic Republic of)"],"raw_orcid":"https://orcid.org/0000-0003-4079-8603","affiliations":[{"raw_affiliation_string":"Electrical & Computer Engineering, Sharif University of Technology","institution_ids":["https://openalex.org/I133529467"]},{"raw_affiliation_string":"IPM","institution_ids":[]},{"raw_affiliation_string":"Electrical & Computer Engineering, Sharif University of Technology, Tehran, Iran (the Islamic Republic of)","institution_ids":["https://openalex.org/I133529467"]},{"raw_affiliation_string":"IPM, Tehran, Iran (the Islamic Republic of)","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057697787","display_name":"Babak Falsafi","orcid":"https://orcid.org/0000-0001-5916-8068"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Babak Falsafi","raw_affiliation_strings":["EPFL","EPFL, Lausanne Switzerland"],"raw_orcid":"https://orcid.org/0000-0001-5916-8068","affiliations":[{"raw_affiliation_string":"EPFL","institution_ids":[]},{"raw_affiliation_string":"EPFL, Lausanne Switzerland","institution_ids":["https://openalex.org/I5124864"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19196746,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"22","issue":"3","first_page":"1","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8267629146575928},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6488373279571533},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.551466166973114},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.5412376523017883},{"id":"https://openalex.org/keywords/stall","display_name":"Stall (fluid mechanics)","score":0.5029739737510681},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.47490039467811584},{"id":"https://openalex.org/keywords/chip","display_name":"Chip","score":0.4611753225326538},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.4468100965023041},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.39386826753616333}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8267629146575928},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6488373279571533},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.551466166973114},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.5412376523017883},{"id":"https://openalex.org/C5804382","wikidata":"https://www.wikidata.org/wiki/Q752034","display_name":"Stall (fluid mechanics)","level":2,"score":0.5029739737510681},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.47490039467811584},{"id":"https://openalex.org/C165005293","wikidata":"https://www.wikidata.org/wiki/Q1074500","display_name":"Chip","level":2,"score":0.4611753225326538},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.4468100965023041},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.39386826753616333},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C57879066","wikidata":"https://www.wikidata.org/wiki/Q41217","display_name":"Mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3760782","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3760782","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3760782","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},{"id":"pmh:oai:upcommons.upc.edu:2117/440588","is_oa":true,"landing_page_url":"https://hdl.handle.net/2117/440588","pdf_url":null,"source":{"id":"https://openalex.org/S4377196262","display_name":"UPCommons institutional repository (Universitat Polit\u00e8cnica de Catalunya)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I9617848","host_organization_name":"Universitat Polit\u00e8cnica de Catalunya","host_organization_lineage":["https://openalex.org/I9617848"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"},{"id":"pmh:oai:infoscience.epfl.ch:20.500.14299/253230","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/253230","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"research article"}],"best_oa_location":{"id":"doi:10.1145/3760782","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3760782","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3760782","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.9100000262260437}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4413286977.pdf","grobid_xml":"https://content.openalex.org/works/W4413286977.grobid-xml"},"referenced_works_count":53,"referenced_works":["https://openalex.org/W1635512741","https://openalex.org/W1983235612","https://openalex.org/W1997162567","https://openalex.org/W1997352364","https://openalex.org/W2051968856","https://openalex.org/W2062527253","https://openalex.org/W2067441262","https://openalex.org/W2080592089","https://openalex.org/W2083221501","https://openalex.org/W2084110734","https://openalex.org/W2098505406","https://openalex.org/W2108598243","https://openalex.org/W2111096793","https://openalex.org/W2128120785","https://openalex.org/W2149234156","https://openalex.org/W2169880332","https://openalex.org/W2236227338","https://openalex.org/W2273440736","https://openalex.org/W2319071579","https://openalex.org/W2537959404","https://openalex.org/W2588191434","https://openalex.org/W2725159389","https://openalex.org/W2886858179","https://openalex.org/W2887760990","https://openalex.org/W2895553128","https://openalex.org/W2903193068","https://openalex.org/W2903738101","https://openalex.org/W2948904100","https://openalex.org/W2950959891","https://openalex.org/W2950969115","https://openalex.org/W2951631461","https://openalex.org/W2963615322","https://openalex.org/W2980229124","https://openalex.org/W3032902986","https://openalex.org/W3046181883","https://openalex.org/W3102510044","https://openalex.org/W3117273852","https://openalex.org/W3126386565","https://openalex.org/W3157126516","https://openalex.org/W3176287187","https://openalex.org/W3187908937","https://openalex.org/W4220807878","https://openalex.org/W4221109973","https://openalex.org/W4226062505","https://openalex.org/W4232019702","https://openalex.org/W4234041207","https://openalex.org/W4243704263","https://openalex.org/W4300171661","https://openalex.org/W4307101487","https://openalex.org/W4312053794","https://openalex.org/W4327694885","https://openalex.org/W4389476360","https://openalex.org/W4392450754"],"related_works":["https://openalex.org/W2335065534","https://openalex.org/W566759681","https://openalex.org/W2112389123","https://openalex.org/W1525610478","https://openalex.org/W2030662055","https://openalex.org/W3004750080","https://openalex.org/W404320512","https://openalex.org/W1997328713","https://openalex.org/W197860597","https://openalex.org/W2330277522"],"abstract_inverted_index":{"Memory":[0],"hierarchy":[1,114],"in":[2,138,186],"Graphics":[3],"Processing":[4],"Units":[5],"(GPUs)":[6],"is":[7,130],"conventionally":[8],"designed":[9],"to":[10,25,97,179],"provide":[11],"high":[12,23,219],"bandwidth":[13],"rather":[14],"than":[15,60,90],"low":[16,49,102],"latency.":[17,162],"In":[18],"particular,":[19],"because":[20],"of":[21,57,70,148,169,183],"the":[22,29,144,181,196,205,215],"tolerance":[24],"load-to-use":[26,88,161,220],"latency":[27,89,123],"(i.e.,":[28],"time":[30],"that":[31,53,84,91,115,142,191],"warps":[32],"wait":[33],"for":[34,45,214],"data":[35],"fetched":[36],"by":[37,93,199,209],"memory":[38],"loads),":[39],"GPU":[40,140],"L1D":[41,119],"caches":[42],"are":[43,54,66],"optimized":[44],"density,":[46],"capacity,":[47],"and":[48,79,101,125,158,193,201,203,211,225],"power":[50,226],"with":[51,121,222],"latencies":[52],"often":[55],"orders":[56],"magnitude":[58],"longer":[59],"conventional":[61,153],"CPU":[62],"caches.":[63],"However,":[64],"there":[65],"many":[67],"important":[68],"classes":[69],"data-parallel":[71,149],"applications":[72,216],"(e.g.,":[73],"graph,":[74],"tree,":[75],"priority":[76],"queue":[77],"processing,":[78],"sparse":[80],"deep":[81],"learning":[82],"applications)":[83],"benefit":[85],"from":[86,218],"lower":[87],"offered":[92],"modern":[94],"GPUs":[95],"due":[96],"their":[98],"inherent":[99],"divergence":[100],"effective":[103],"Thread-Level":[104],"Parallelism":[105],"(TLP).":[106],"This":[107],"article":[108],"introduces":[109],"an":[110,166],"innovative":[111],"on-chip":[112],"cache":[113,120,136],"incorporates":[116],"a":[117,131,175],"decoupled":[118],"reduced":[122],"(LoTUS)":[124],"its":[126],"management":[127],"scheme.":[128],"LoTUS":[129,192],"minimally":[132],"sized":[133],"fully":[134],"associative":[135],"placed":[137],"each":[139],"subcore":[141],"captures":[143],"primary":[145],"working":[146],"set":[147],"applications.":[150],"It":[151],"exploits":[152],"high-performance":[154],"low-density":[155],"SRAM":[156],"cells":[157],"dramatically":[159],"reduces":[160],"We":[163],"also":[164],"propose":[165],"intelligent":[167],"extension":[168],"LoTUS,":[170],"called":[171],"LoTUSage,":[172],"which":[173],"employs":[174],"lightweight":[176],"learning-based":[177],"model":[178],"predict":[180],"utility":[182],"caching":[184],"requests":[185],"LoTUS.":[187],"Evaluation":[188],"results":[189],"show":[190],"LoTUSage":[194],"improve":[195],"average":[197,206],"performance":[198],"23.9%":[200],"35.4%":[202],"reduce":[204],"energy":[207],"consumption":[208],"27.8%":[210],"38.5%,":[212],"respectively,":[213],"suffering":[217],"stalls":[221],"negligible":[223],"area":[224],"overheads.":[227]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
