{"id":"https://openalex.org/W2798966306","doi":"https://doi.org/10.1631/fitee.1700059","title":"CWLP: coordinated warp scheduling and locality-protected cache allocation on GPUs","display_name":"CWLP: coordinated warp scheduling and locality-protected cache allocation on GPUs","publication_year":2018,"publication_date":"2018-02-01","ids":{"openalex":"https://openalex.org/W2798966306","doi":"https://doi.org/10.1631/fitee.1700059","mag":"2798966306"},"language":"en","primary_location":{"id":"doi:10.1631/fitee.1700059","is_oa":false,"landing_page_url":"https://doi.org/10.1631/fitee.1700059","pdf_url":null,"source":{"id":"https://openalex.org/S4210189857","display_name":"Frontiers of Information Technology & Electronic Engineering","issn_l":"2095-9184","issn":["2095-9184","2095-9230"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers of Information Technology &amp; Electronic Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100354558","display_name":"Yang Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yang Zhang","raw_affiliation_strings":["National Laboratory for Parallel and Distributed Processing, National University of Defense Technology, Changsha, 410073, China"],"affiliations":[{"raw_affiliation_string":"National Laboratory for Parallel and Distributed Processing, National University of Defense Technology, Changsha, 410073, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100534277","display_name":"Zuocheng Xing","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zuo-cheng Xing","raw_affiliation_strings":["National Laboratory for Parallel and Distributed Processing, National University of Defense Technology, Changsha, 410073, China"],"affiliations":[{"raw_affiliation_string":"National Laboratory for Parallel and Distributed Processing, National University of Defense Technology, Changsha, 410073, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088350919","display_name":"Cang Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cang Liu","raw_affiliation_strings":["National Laboratory for Parallel and Distributed Processing, National University of Defense Technology, Changsha, 410073, China"],"affiliations":[{"raw_affiliation_string":"National Laboratory for Parallel and Distributed Processing, National University of Defense Technology, Changsha, 410073, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014640087","display_name":"Chuan Tang","orcid":"https://orcid.org/0000-0001-7975-0469"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chuan Tang","raw_affiliation_strings":["National Laboratory for Parallel and Distributed Processing, National University of Defense Technology, Changsha, 410073, China"],"affiliations":[{"raw_affiliation_string":"National Laboratory for Parallel and Distributed Processing, National University of Defense Technology, Changsha, 410073, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100354558"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.5264,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.610201,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"19","issue":"2","first_page":"206","last_page":"220"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8225992321968079},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8012459874153137},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.7832240462303162},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.731242299079895},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5809750556945801},{"id":"https://openalex.org/keywords/locality-of-reference","display_name":"Locality of reference","score":0.4351421296596527},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.32235953211784363},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.04845806956291199}],"concepts":[{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8225992321968079},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8012459874153137},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.7832240462303162},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.731242299079895},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5809750556945801},{"id":"https://openalex.org/C27602214","wikidata":"https://www.wikidata.org/wiki/Q1868547","display_name":"Locality of reference","level":3,"score":0.4351421296596527},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.32235953211784363},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.04845806956291199},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1631/fitee.1700059","is_oa":false,"landing_page_url":"https://doi.org/10.1631/fitee.1700059","pdf_url":null,"source":{"id":"https://openalex.org/S4210189857","display_name":"Frontiers of Information Technology & Electronic Engineering","issn_l":"2095-9184","issn":["2095-9184","2095-9230"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers of Information Technology &amp; Electronic Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.9100000262260437,"id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G3940563842","display_name":null,"funder_award_id":"61170083","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1979527452","https://openalex.org/W1982996921","https://openalex.org/W1989061323","https://openalex.org/W2026791745","https://openalex.org/W2027806965","https://openalex.org/W2033486618","https://openalex.org/W2047060659","https://openalex.org/W2053744175","https://openalex.org/W2067441262","https://openalex.org/W2072768743","https://openalex.org/W2079038734","https://openalex.org/W2080285119","https://openalex.org/W2080592089","https://openalex.org/W2081583983","https://openalex.org/W2084309410","https://openalex.org/W2090584832","https://openalex.org/W2096661534","https://openalex.org/W2098505406","https://openalex.org/W2111669132","https://openalex.org/W2139605600","https://openalex.org/W2142444503","https://openalex.org/W2151637286","https://openalex.org/W2232645663","https://openalex.org/W2586960021","https://openalex.org/W2769375938","https://openalex.org/W4237024478","https://openalex.org/W4244089596","https://openalex.org/W4245661606","https://openalex.org/W4251579344"],"related_works":["https://openalex.org/W1555349535","https://openalex.org/W2583128298","https://openalex.org/W2053359564","https://openalex.org/W2161159383","https://openalex.org/W1495260638","https://openalex.org/W1511204342","https://openalex.org/W2369125128","https://openalex.org/W2369223577","https://openalex.org/W2010020348","https://openalex.org/W1982580072"],"abstract_inverted_index":{"As":[0],"we":[1,147,190],"approach":[2],"the":[3,90,99,128,138,180,198,215,220,225,228,233,239,273],"exascale":[4],"era":[5],"in":[6,36],"supercomputing,":[7],"designing":[8],"a":[9,14,43,50,54,68,87,150,173,192,207,261],"balanced":[10],"computer":[11],"system":[12,139],"with":[13,53,219,227],"powerful":[15,62],"computing":[16,63,122],"ability":[17,123],"and":[18,94,132,141,154,167,205,252,266],"low":[19],"power":[20],"requirements":[21],"has":[22],"becoming":[23],"increasingly":[24],"important.":[25],"The":[26,80],"graphics":[27],"processing":[28],"unit":[29,211],"(GPU)":[30],"is":[31,83,236],"an":[32,244,267],"accelerator":[33],"used":[34,237],"widely":[35],"most":[37],"of":[38,46,71,119,164,201,270],"recent":[39],"supercomputers.":[40],"It":[41],"adopts":[42],"large":[44],"number":[45],"threads":[47],"to":[48,60,86,105,111,160,185,196,223,242,249,264],"hide":[49,168,253],"long":[51],"latency":[52,109],"high":[55,121],"energy":[56,142],"efficiency.":[57,143],"In":[58,144],"contrast":[59],"their":[61],"ability,":[64],"GPUs":[65,103],"have":[66],"only":[67],"few":[69],"megabytes":[70],"fast":[72],"on-chip":[73],"memory":[74],"storage":[75],"per":[76],"streaming":[77],"multiprocessor":[78],"(SM).":[79],"GPU":[81],"cache":[82,95,130,157,175,187,203,209],"inefficient":[84],"due":[85,110],"mismatch":[88],"between":[89],"throughput-oriented":[91],"execution":[92],"model":[93],"hierarchy":[96],"design.":[97],"At":[98],"same":[100],"time,":[101],"current":[102],"fail":[104],"handle":[106],"burst-mode":[107],"long-access":[108],"GPU\u2019s":[112,120],"poor":[113,129],"warp":[114,133,152,240,246],"scheduling":[115,134,153],"method.":[116],"Thus,":[117],"benefits":[118],"are":[124],"reduced":[125],"dramatically":[126],"by":[127,238],"management":[131],"methods,":[135],"which":[136,213],"limit":[137],"performance":[140],"this":[145],"paper,":[146],"put":[148],"forward":[149],"coordinated":[151],"locality-protected":[155,174],"(CWLP)":[156],"allocation":[158,176,210],"scheme":[159,248],"make":[161],"full":[162],"use":[163,191],"data":[165,216],"locality":[166,194,234,251],"latency.":[169,254],"We":[170],"first":[171],"present":[172],"method":[177],"based":[178],"on":[179],"instruction":[181],"program":[182],"counter":[183],"(LPC)":[184],"promote":[186],"performance.":[188],"Specifically,":[189],"PC-based":[193],"detector":[195],"collect":[197],"reuse":[199,217,230],"information":[200,218,222,235],"each":[202],"line":[204],"employ":[206],"prioritised":[208],"(PCAU)":[212],"coordinates":[214],"time-stamp":[221],"evict":[224],"lines":[226],"least":[229],"possibility.":[231],"Moreover,":[232],"scheduler":[241],"create":[243],"intelligent":[245],"reordering":[247],"capture":[250],"Simulation":[255],"results":[256],"show":[257],"that":[258],"CWLP":[259],"provides":[260],"speedup":[262],"up":[263],"19.8%":[265],"average":[268],"improvement":[269],"8.8%":[271],"over":[272],"baseline":[274],"methods.":[275]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
