{"id":"https://openalex.org/W4236446046","doi":"https://doi.org/10.1145/3287318","title":"Quantifying Data Locality in Dynamic Parallelism in GPUs","display_name":"Quantifying Data Locality in Dynamic Parallelism in GPUs","publication_year":2018,"publication_date":"2018-12-21","ids":{"openalex":"https://openalex.org/W4236446046","doi":"https://doi.org/10.1145/3287318"},"language":"en","primary_location":{"id":"doi:10.1145/3287318","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3287318","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3287318","source":{"id":"https://openalex.org/S4210193547","display_name":"Proceedings of the ACM on Measurement and Analysis of Computing Systems","issn_l":"2476-1249","issn":["2476-1249"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Measurement and Analysis of Computing Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3287318","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087859795","display_name":"Xulong Tang","orcid":"https://orcid.org/0000-0002-3385-2053"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xulong Tang","raw_affiliation_strings":["Pennsylvania State University, state college, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Pennsylvania State University, state college, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112474431","display_name":"Ashutosh Pattnaik","orcid":"https://orcid.org/0000-0003-0367-5989"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ashutosh Pattnaik","raw_affiliation_strings":["Pennsylvania State University, State College, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Pennsylvania State University, State College, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064126781","display_name":"Onur Kay\u0131ran","orcid":"https://orcid.org/0009-0006-4482-3115"},"institutions":[{"id":"https://openalex.org/I4210137977","display_name":"Advanced Micro Devices (United States)","ror":"https://ror.org/04kd6c783","country_code":"US","type":"company","lineage":["https://openalex.org/I4210137977"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Onur Kayiran","raw_affiliation_strings":["Advanced Micro Devices, Inc., Santa Clara, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Advanced Micro Devices, Inc., Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210137977"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050350124","display_name":"Adwait Jog","orcid":"https://orcid.org/0000-0002-5525-7204"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]},{"id":"https://openalex.org/I267592682","display_name":"Williams (United States)","ror":"https://ror.org/007zhvp17","country_code":"US","type":"company","lineage":["https://openalex.org/I267592682"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Adwait Jog","raw_affiliation_strings":["College of William and Mary, Williamsburg, VA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of William and Mary, Williamsburg, VA, USA","institution_ids":["https://openalex.org/I16285277","https://openalex.org/I267592682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007116603","display_name":"Mahmut Kandemir","orcid":"https://orcid.org/0000-0002-9940-9951"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mahmut Taylan Kandemir","raw_affiliation_strings":["Pennsylvania State University, State College, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Pennsylvania State University, State College, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054027488","display_name":"Chita R. Das","orcid":"https://orcid.org/0000-0002-4746-7578"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chita Das","raw_affiliation_strings":["Pennsylvania State University, State College, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Pennsylvania State University, State College, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2375,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.59125519,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"2","issue":"3","first_page":"1","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.884941577911377},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.7828865647315979},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6812108755111694},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.6495154500007629},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6145207285881042},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.5274767279624939},{"id":"https://openalex.org/keywords/dynamic-data","display_name":"Dynamic data","score":0.49106645584106445},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.45938780903816223},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.44849586486816406},{"id":"https://openalex.org/keywords/locality-of-reference","display_name":"Locality of reference","score":0.445814847946167},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.432058721780777},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.4253832995891571},{"id":"https://openalex.org/keywords/limit","display_name":"Limit (mathematics)","score":0.4207960069179535},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.1869928538799286},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.13153699040412903}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.884941577911377},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.7828865647315979},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6812108755111694},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.6495154500007629},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6145207285881042},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.5274767279624939},{"id":"https://openalex.org/C197298091","wikidata":"https://www.wikidata.org/wiki/Q5318963","display_name":"Dynamic data","level":2,"score":0.49106645584106445},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.45938780903816223},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.44849586486816406},{"id":"https://openalex.org/C27602214","wikidata":"https://www.wikidata.org/wiki/Q1868547","display_name":"Locality of reference","level":3,"score":0.445814847946167},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.432058721780777},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.4253832995891571},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.4207960069179535},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.1869928538799286},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.13153699040412903},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3287318","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3287318","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3287318","source":{"id":"https://openalex.org/S4210193547","display_name":"Proceedings of the ACM on Measurement and Analysis of Computing Systems","issn_l":"2476-1249","issn":["2476-1249"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Measurement and Analysis of Computing Systems","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3287318","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3287318","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3287318","source":{"id":"https://openalex.org/S4210193547","display_name":"Proceedings of the ACM on Measurement and Analysis of Computing Systems","issn_l":"2476-1249","issn":["2476-1249"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Measurement and Analysis of Computing Systems","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1644213030","display_name":null,"funder_award_id":"1626251","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G1849369134","display_name":"CSR: Medium: Collaborative Research: Enabling GPUs as First-Class Computing Engines","funder_award_id":"1409095","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2651608833","display_name":null,"funder_award_id":"1750667","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2782064734","display_name":null,"funder_award_id":"1526750, 1763681, 1439057, 1439021, 1629129, 1409095, 1626251, 1629915","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3023408722","display_name":null,"funder_award_id":"1763681","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3727847720","display_name":"CRII: SHF: Design and Analysis of Processing-Near-Memory Enabled GPU Architecture","funder_award_id":"1657336","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3926047959","display_name":null,"funder_award_id":"1439057","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4257531402","display_name":null,"funder_award_id":"1629915","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7045959657","display_name":null,"funder_award_id":"1629129","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320307757","display_name":"Advanced Micro Devices","ror":"https://ror.org/04kd6c783"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4236446046.pdf","grobid_xml":"https://content.openalex.org/works/W4236446046.grobid-xml"},"referenced_works_count":49,"referenced_works":["https://openalex.org/W1534435001","https://openalex.org/W1955594754","https://openalex.org/W1973573211","https://openalex.org/W1975274780","https://openalex.org/W1979527452","https://openalex.org/W1982996921","https://openalex.org/W1989061323","https://openalex.org/W2005881934","https://openalex.org/W2009310378","https://openalex.org/W2020572638","https://openalex.org/W2029940394","https://openalex.org/W2047060659","https://openalex.org/W2051688423","https://openalex.org/W2062527253","https://openalex.org/W2067441262","https://openalex.org/W2078994750","https://openalex.org/W2079038734","https://openalex.org/W2084309410","https://openalex.org/W2090278477","https://openalex.org/W2090584832","https://openalex.org/W2096041635","https://openalex.org/W2109432325","https://openalex.org/W2132366470","https://openalex.org/W2139318440","https://openalex.org/W2142444503","https://openalex.org/W2155893237","https://openalex.org/W2156144599","https://openalex.org/W2238700765","https://openalex.org/W2412101011","https://openalex.org/W2513900365","https://openalex.org/W2517869808","https://openalex.org/W2529404329","https://openalex.org/W2538845954","https://openalex.org/W2567006428","https://openalex.org/W2567317362","https://openalex.org/W2605251767","https://openalex.org/W2611998574","https://openalex.org/W2626312854","https://openalex.org/W2766789999","https://openalex.org/W2788386530","https://openalex.org/W2798426781","https://openalex.org/W2899826618","https://openalex.org/W4235508083","https://openalex.org/W4235870392","https://openalex.org/W4241057782","https://openalex.org/W4250054289","https://openalex.org/W4255962840","https://openalex.org/W4256231890","https://openalex.org/W6713134421"],"related_works":["https://openalex.org/W1555349535","https://openalex.org/W2583128298","https://openalex.org/W2053359564","https://openalex.org/W2161159383","https://openalex.org/W1495260638","https://openalex.org/W1511204342","https://openalex.org/W2369125128","https://openalex.org/W2369223577","https://openalex.org/W2010020348","https://openalex.org/W4238138329"],"abstract_inverted_index":{"GPUs":[0,42],"are":[1,10,19,200],"becoming":[2],"prevalent":[3],"in":[4,87,101,119,136],"various":[5],"domains":[6],"of":[7,40,69,108,111,121,133,140,233],"computing":[8],"and":[9,50,80,145,158,165],"widely":[11],"used":[12],"for":[13,150,176],"streaming":[14],"(regular)":[15],"applications.":[16,178],"However,":[17,103],"they":[18],"highly":[20,156],"inefficient":[21],"when":[22],"executing":[23],"irregular":[24,113,157],"applications":[25,114,135],"with":[26,83,202],"unstructured":[27],"inputs":[28],"due":[29],"to":[30,47,61,74,252,257],"load":[31],"imbalance.":[32],"Dynamic":[33],"parallelism":[34,117],"(DP)":[35],"is":[36,73,105,155,159],"a":[37,88,106,185,241],"new":[38,45],"feature":[39],"emerging":[41],"that":[43,192,199,270],"allows":[44],"kernels":[46],"be":[48,194],"generated":[49],"scheduled":[51],"from":[52,229],"the":[53,57,70,76,84,130,163,189,215,221,226,230,245],"device-side":[54],"(GPU)":[55],"without":[56],"host-side":[58],"(CPU)":[59],"intervention":[60],"increase":[62],"parallelism.":[63],"To":[64,179],"efficiently":[65],"support":[66],"DP,":[67],"one":[68],"major":[71],"challenges":[72],"saturate":[75],"GPU":[77],"processing":[78],"elements":[79],"provide":[81],"them":[82],"required":[85],"data":[86,99,122,131,153,173,204,249,259],"timely":[89],"fashion.":[90],"There":[91],"have":[92],"been":[93],"considerable":[94],"efforts":[95],"focusing":[96],"on":[97,162,188,212,225,266,272],"exploiting":[98],"locality":[100,260],"GPUs.":[102],"there":[104],"lack":[107],"quantitative":[109,231],"analysis":[110,232],"how":[112],"using":[115],"dynamic":[116,134],"behave":[118],"terms":[120],"reuse.":[123],"In":[124],"this":[125,180],"paper,":[126],"we":[127,182,237],"quantitatively":[128],"analyze":[129],"reuse":[132,154,174,205,250],"three":[137],"different":[138],"granularities":[139],"schedulable":[141],"units:":[142],"kernel,":[143],"work-group,":[144],"wavefront.":[146],"We":[147],"observe":[148],"that,":[149,211],"DP":[151,177,235],"applications,":[152,236],"heavily":[160],"dependent":[161],"application":[164],"its":[166],"input.":[167],"Thus,":[168],"existing":[169],"techniques":[170],"cannot":[171],"exploit":[172],"effectively":[175],"end,":[181],"first":[183],"conduct":[184],"limit":[186,208],"study":[187,209],"performance":[190,216,277],"improvements":[191],"can":[193,275],"achieved":[195],"by":[196,218,278],"hardware":[197,246],"schedulers":[198,247],"provided":[201],"accurate":[203],"information.":[206],"This":[207],"shows":[210],"an":[213,273],"average,":[214,274],"improves":[217],"19.4%":[219],"over":[220],"baseline":[222],"scheduler.":[223],"Based":[224],"key":[227],"observations":[228],"our":[234],"next":[238],"propose":[239],"LASER,":[240,271],"Locality-Aware":[242],"SchedulER,":[243],"where":[244],"employ":[248],"monitors":[251],"help":[253],"make":[254],"scheduling":[255],"decisions":[256],"improve":[258,276],"at":[261],"runtime.":[262],"Our":[263],"experimental":[264],"results":[265],"16":[267],"benchmarks":[268],"show":[269],"11.3%.":[279]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
