{"id":"https://openalex.org/W4245867598","doi":"https://doi.org/10.1109/pact.2013.6618813","title":"Reshaping cache misses to improve row-buffer locality in multicore systems","display_name":"Reshaping cache misses to improve row-buffer locality in multicore systems","publication_year":2013,"publication_date":"2013-10-01","ids":{"openalex":"https://openalex.org/W4245867598","doi":"https://doi.org/10.1109/pact.2013.6618813"},"language":"en","primary_location":{"id":"doi:10.1109/pact.2013.6618813","is_oa":false,"landing_page_url":"https://doi.org/10.1109/pact.2013.6618813","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064126781","display_name":"Onur Kay\u0131ran","orcid":"https://orcid.org/0009-0006-4482-3115"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Onur Kayiran","raw_affiliation_strings":["Department of Computer Science and Engineering, Pennsylvania State University, University Park, PA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Pennsylvania State University, University Park, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050350124","display_name":"Adwait Jog","orcid":"https://orcid.org/0000-0002-5525-7204"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Adwait Jog","raw_affiliation_strings":["Department of Computer Science and Engineering, Pennsylvania State University, University Park, PA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Pennsylvania State University, University Park, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007116603","display_name":"Mahmut Kandemir","orcid":"https://orcid.org/0000-0002-9940-9951"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mahmut T. Kandemir","raw_affiliation_strings":["Department of Computer Science and Engineering, Pennsylvania State University, University Park, PA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Pennsylvania State University, University Park, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054027488","display_name":"Chita R. Das","orcid":"https://orcid.org/0000-0002-4746-7578"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chita R. Das","raw_affiliation_strings":["Department of Computer Science and Engineering, Pennsylvania State University, University Park, PA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Pennsylvania State University, University Park, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5064126781"],"corresponding_institution_ids":["https://openalex.org/I130769515"],"apc_list":null,"apc_paid":null,"fwci":1.8913,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.87019016,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8834314346313477},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.7013123035430908},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6700027585029602},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.6441026926040649},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.6062275767326355},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.5804398059844971},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5787774324417114},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.560826301574707},{"id":"https://openalex.org/keywords/cache-coherence","display_name":"Cache coherence","score":0.4411298334598541},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.43679094314575195},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4359927475452423},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.4286661446094513},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3698385953903198},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.36791855096817017},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.35593974590301514},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.3209967613220215},{"id":"https://openalex.org/keywords/cache-algorithms","display_name":"Cache algorithms","score":0.21692803502082825}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8834314346313477},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.7013123035430908},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6700027585029602},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.6441026926040649},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.6062275767326355},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.5804398059844971},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5787774324417114},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.560826301574707},{"id":"https://openalex.org/C141917322","wikidata":"https://www.wikidata.org/wiki/Q1025017","display_name":"Cache coherence","level":5,"score":0.4411298334598541},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.43679094314575195},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4359927475452423},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.4286661446094513},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3698385953903198},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.36791855096817017},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.35593974590301514},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.3209967613220215},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.21692803502082825},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/pact.2013.6618813","is_oa":false,"landing_page_url":"https://doi.org/10.1109/pact.2013.6618813","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Decent work and economic growth","score":0.4699999988079071,"id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1979527452","https://openalex.org/W1979866689","https://openalex.org/W1983235612","https://openalex.org/W2020572638","https://openalex.org/W2024122052","https://openalex.org/W2047060659","https://openalex.org/W2048118645","https://openalex.org/W2061179133","https://openalex.org/W2080592089","https://openalex.org/W2090584832","https://openalex.org/W2096661534","https://openalex.org/W2101209730","https://openalex.org/W2107333973","https://openalex.org/W2110195531","https://openalex.org/W2129817042","https://openalex.org/W2130820665","https://openalex.org/W2131312592","https://openalex.org/W2141579716","https://openalex.org/W2142444503","https://openalex.org/W2148443481","https://openalex.org/W2150851481","https://openalex.org/W2155503253","https://openalex.org/W2155568054","https://openalex.org/W2160428323","https://openalex.org/W2166918318","https://openalex.org/W2167334577","https://openalex.org/W2170634604","https://openalex.org/W3213084269","https://openalex.org/W6679359031","https://openalex.org/W6680965528"],"related_works":["https://openalex.org/W1963859303","https://openalex.org/W2364044215","https://openalex.org/W2389600408","https://openalex.org/W240129890","https://openalex.org/W3048701459","https://openalex.org/W2149078538","https://openalex.org/W2080146221","https://openalex.org/W2370314112","https://openalex.org/W1912958759","https://openalex.org/W2792081825"],"abstract_inverted_index":{"General-purpose":[0],"graphics":[1],"processing":[2],"units":[3],"(GPG-PUs)":[4],"are":[5,55],"at":[6,146],"their":[7],"best":[8],"in":[9,43,63,95,136,190,220],"accelerating":[10],"computation":[11],"by":[12,19,82,89,166],"exploiting":[13,90],"abundant":[14],"thread-level":[15],"parallelism":[16],"(TLP)":[17],"offered":[18],"many":[20],"classes":[21],"of":[22,45,57,104,121,170],"HPC":[23],"applications.":[24],"To":[25,149,176],"facilitate":[26],"such":[27],"high":[28,188,198],"TLP,":[29],"emerging":[30],"programming":[31],"models":[32],"like":[33],"CUDA":[34],"and":[35,59,133,140],"OpenCL":[36],"allow":[37],"programmers":[38],"to":[39,86,130,143,195,223],"create":[40,134],"work":[41,47],"abstractions":[42],"terms":[44],"smaller":[46],"units,":[48],"called":[49,160],"cooperative":[50],"thread":[51],"arrays":[52],"(CTAs).":[53],"CTAs":[54,79,105,183],"groups":[56],"threads":[58,124],"can":[60],"be":[61,131],"executed":[62],"any":[64],"order,":[65],"thereby":[66],"providing":[67],"ample":[68],"opportunities":[69],"for":[70,184],"TLP.":[71,91],"The":[72],"state-of-the-art":[73],"GPGPU":[74,205],"schedulers":[75],"allocate":[76],"maximum":[77,101],"possible":[78,102],"per-core":[80],"(limited":[81],"available":[83],"on-chip":[84],"resources)":[85],"enhance":[87],"performance":[88,117,221],"However,":[92],"we":[93,153],"demonstrate":[94],"this":[96],"paper":[97],"that":[98,211],"executing":[99,123],"the":[100,112,116,137,147,164,191,212,224],"number":[103,120,169],"on":[106,173,202],"a":[107,155,203],"core":[108],"is":[109],"not":[110],"always":[111],"optimal":[113,168],"choice":[114],"from":[115,187],"perspective.":[118],"High":[119],"concurrently":[122],"might":[125],"cause":[126],"more":[127],"memory":[128,192],"requests":[129],"issued,":[132],"contention":[135,189],"caches,":[138],"network":[139],"memory,":[141],"leading":[142],"long":[144],"stalls":[145],"cores.":[148],"reduce":[150],"resource":[151,178],"contention,":[152,179],"propose":[154],"dynamic":[156],"CTA":[157,214,226],"scheduling":[158],"mechanism,":[159],"DYNCTA,":[161],"which":[162],"modulates":[163],"TLP":[165],"allocating":[167],"CTAs,":[171],"based":[172],"application":[174],"characteristics.":[175],"minimize":[177],"DYNCTA":[180],"allocates":[181],"fewer":[182],"applications":[185,196,209],"suffering":[186],"subsystem,":[193],"compared":[194,222],"demonstrating":[197],"throughput.":[199],"Simulation":[200],"results":[201],"30-core":[204],"platform":[206],"with":[207],"31":[208],"show":[210],"proposed":[213],"scheduler":[215],"provides":[216],"28%":[217],"average":[218],"improvement":[219],"existing":[225],"scheduler.":[227]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
