{"id":"https://openalex.org/W4411486507","doi":"https://doi.org/10.1145/3695053.3731068","title":"GCStack+GCScaler: Fast and Accurate GPU Performance Analyses Using Fine-Grained Stall Cycle Accounting and Interval Analysis","display_name":"GCStack+GCScaler: Fast and Accurate GPU Performance Analyses Using Fine-Grained Stall Cycle Accounting and Interval Analysis","publication_year":2025,"publication_date":"2025-06-20","ids":{"openalex":"https://openalex.org/W4411486507","doi":"https://doi.org/10.1145/3695053.3731068"},"language":"en","primary_location":{"id":"doi:10.1145/3695053.3731068","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3695053.3731068","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731068","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731068","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011903464","display_name":"Hanna Cha","orcid":"https://orcid.org/0009-0003-5937-8550"},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Hanna Cha","raw_affiliation_strings":["Yonsei University, Seoul, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Yonsei University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I193775966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064293175","display_name":"Sungchul Lee","orcid":"https://orcid.org/0009-0009-5549-7265"},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sungchul Lee","raw_affiliation_strings":["Yonsei University, Seoul, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Yonsei University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I193775966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052472647","display_name":"Jounghoo Lee","orcid":"https://orcid.org/0000-0002-0463-7717"},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jounghoo Lee","raw_affiliation_strings":["Yonsei University, Seoul, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Yonsei University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I193775966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026190390","display_name":"Yeonan Ha","orcid":"https://orcid.org/0000-0002-3418-5299"},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Yeonan Ha","raw_affiliation_strings":["Yonsei University, Seoul, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Yonsei University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I193775966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078370106","display_name":"Joonsung Kim","orcid":"https://orcid.org/0000-0002-5432-7813"},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Joonsung Kim","raw_affiliation_strings":["Sungkyunkwan University, Suwon, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Sungkyunkwan University, Suwon, Republic of Korea","institution_ids":["https://openalex.org/I848706"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088551237","display_name":"Youngsok Kim","orcid":"https://orcid.org/0000-0002-1015-9969"},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Youngsok Kim","raw_affiliation_strings":["Yonsei University, Seoul, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Yonsei University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I193775966"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5011903464"],"corresponding_institution_ids":["https://openalex.org/I193775966"],"apc_list":null,"apc_paid":null,"fwci":2.4104,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.86587938,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1509","last_page":"1523"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5815133452415466},{"id":"https://openalex.org/keywords/stall","display_name":"Stall (fluid mechanics)","score":0.5327339768409729},{"id":"https://openalex.org/keywords/interval","display_name":"Interval (graph theory)","score":0.5070066452026367},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.371221661567688},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12494289875030518},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.0652419924736023},{"id":"https://openalex.org/keywords/mechanics","display_name":"Mechanics","score":0.06129518151283264}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5815133452415466},{"id":"https://openalex.org/C5804382","wikidata":"https://www.wikidata.org/wiki/Q752034","display_name":"Stall (fluid mechanics)","level":2,"score":0.5327339768409729},{"id":"https://openalex.org/C2778067643","wikidata":"https://www.wikidata.org/wiki/Q166507","display_name":"Interval (graph theory)","level":2,"score":0.5070066452026367},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.371221661567688},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12494289875030518},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0652419924736023},{"id":"https://openalex.org/C57879066","wikidata":"https://www.wikidata.org/wiki/Q41217","display_name":"Mechanics","level":1,"score":0.06129518151283264},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3695053.3731068","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3695053.3731068","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731068","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3695053.3731068","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3695053.3731068","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731068","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1806439380","display_name":null,"funder_award_id":"Korea government (MSIP)","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G2257911995","display_name":null,"funder_award_id":"RS-2020-II201361","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G3034753964","display_name":null,"funder_award_id":"grant","funder_id":"https://openalex.org/F4320320671","funder_display_name":"National Research Foundation"},{"id":"https://openalex.org/G342704958","display_name":null,"funder_award_id":"funded","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G6072120315","display_name":null,"funder_award_id":"funded","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G6281998519","display_name":null,"funder_award_id":"RS-2024-00395134","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G7822388480","display_name":null,"funder_award_id":"RS-2024-00395134","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G8899733881","display_name":null,"funder_award_id":"RS-2020-II201361","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"}],"funders":[{"id":"https://openalex.org/F4320320671","display_name":"National Research Foundation","ror":"https://ror.org/05s0g1g46"},{"id":"https://openalex.org/F4320322030","display_name":"Ministry of Science, ICT and Future Planning","ror":"https://ror.org/032e49973"},{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"},{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4411486507.pdf","grobid_xml":"https://content.openalex.org/works/W4411486507.grobid-xml"},"referenced_works_count":52,"referenced_works":["https://openalex.org/W1902930330","https://openalex.org/W1979527452","https://openalex.org/W1984222112","https://openalex.org/W1985807728","https://openalex.org/W1991056822","https://openalex.org/W2000335122","https://openalex.org/W2021211271","https://openalex.org/W2038666141","https://openalex.org/W2045275492","https://openalex.org/W2080592089","https://openalex.org/W2086739451","https://openalex.org/W2112029501","https://openalex.org/W2123990187","https://openalex.org/W2128120785","https://openalex.org/W2134101883","https://openalex.org/W2144264070","https://openalex.org/W2152513418","https://openalex.org/W2152956697","https://openalex.org/W2234148183","https://openalex.org/W2300338736","https://openalex.org/W2399715892","https://openalex.org/W2417175077","https://openalex.org/W2608190038","https://openalex.org/W2725159389","https://openalex.org/W2736244279","https://openalex.org/W2743508538","https://openalex.org/W2761598581","https://openalex.org/W2794670651","https://openalex.org/W2803405094","https://openalex.org/W2901073342","https://openalex.org/W2921788688","https://openalex.org/W2952928793","https://openalex.org/W2953033921","https://openalex.org/W3027220693","https://openalex.org/W3043571714","https://openalex.org/W3102510044","https://openalex.org/W3104094521","https://openalex.org/W3148013504","https://openalex.org/W3207190098","https://openalex.org/W3207572920","https://openalex.org/W3210604792","https://openalex.org/W3217045543","https://openalex.org/W4206427125","https://openalex.org/W4281689389","https://openalex.org/W4289712286","https://openalex.org/W4296246463","https://openalex.org/W4380874570","https://openalex.org/W4381894551","https://openalex.org/W4389476192","https://openalex.org/W4393592128","https://openalex.org/W4403277057","https://openalex.org/W4404955793"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2335065534","https://openalex.org/W566759681","https://openalex.org/W2112389123","https://openalex.org/W1525610478","https://openalex.org/W2030662055","https://openalex.org/W3004750080","https://openalex.org/W404320512"],"abstract_inverted_index":{"To":[0],"design":[1,23,89],"next-generation":[2],"Graphics":[3],"Processing":[4],"Units":[5],"(GPUs),":[6],"GPU":[7,11,17,22,27,35,42,52,62,88,103,110,124,137,153,189,206,237],"architects":[8,36],"rely":[9],"on":[10],"performance":[12,18,28,43,53,111,125,132,175,251],"analyses":[13,126],"to":[14,37,72,198,214],"identify":[15],"key":[16],"bottlenecks":[19,133],"and":[20,40,66,77,116,122,143,157,172,218,230],"explore":[21],"spaces.Unfortunately,":[24],"the":[25,56,131,147,151,163,174,181,187,200,216],"existing":[26,45,83,188],"analysis":[29,112,156,191],"mechanisms":[30,46,84,119],"make":[31],"it":[32],"difficult":[33],"for":[34,100,203],"conduct":[38],"fast":[39,121],"accurate":[41,123,193],"analyses.The":[44],"can":[47],"provide":[48],"misleading":[49],"insights":[50],"into":[51],"bottlenecks.They":[54],"characterize":[55],"performance-degrading":[57],"stall":[58,75,81,140,148,166,170,194,220],"events":[59,76,167],"of":[60,134,150,235,255],"a":[61,135,177,247],"using":[63,138,154,224],"coarse-grained,":[64],"issue-stage-centric,":[65],"priority-based":[67],"cycle":[68,141,179,183,195,201],"stacks":[69],"which":[70],"tend":[71],"exaggerate":[73],"memory-side":[74],"hide":[78],"concurrently":[79,164],"occurring":[80,165],"events.The":[82],"also":[85],"incur":[86],"high":[87],"space":[90],"exploration":[91,234],"overhead,":[92],"as":[93,176],"they":[94],"involve":[95],"repetitive":[96,242],"cyclelevel":[97],"timing":[98,243],"simulations":[99,244],"evaluating":[101],"alternative":[102,205],"designs.In":[104],"this":[105],"paper,":[106],"we":[107],"propose":[108],"two":[109,118],"mechanisms,":[113],"namely":[114],"GCStack":[115,229],"GCScaler.The":[117],"enable":[120],"by":[127,239],"(1)":[128],"accurately":[129,145],"characterizing":[130],"baseline":[136,152],"fine-grained":[139,178,182],"accounting,":[142],"(2)":[144],"scaling":[146,159,196,211],"cycles":[149,221],"interval":[155,190],"analytical":[158,210],"models.GCStack":[160],"captures":[161],"all":[162],"within":[168],"each":[169],"cycle,":[171],"characterizes":[173],"stack.Using":[180],"stack,":[184],"GCScaler":[185,231],"leverages":[186],"techniques'":[192],"capability":[197],"estimate":[199],"stack":[202],"an":[204,233],"design.GCScaler":[207],"further":[208],"employs":[209],"models":[212],"designed":[213],"scale":[215],"idle":[217],"synchronization":[219],"accurately.Our":[222],"evaluation":[223],"47":[225],"benchmarks":[226],"shows":[227],"that":[228],"accelerate":[232],"1,000":[236],"designs":[238],"32.7":[240],"over":[241],"while":[245],"achieving":[246],"low":[248],"mean":[249],"absolute":[250],"estimation":[252],"error":[253],"rate":[254],"6.37%.":[256]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-14T08:43:22.919905","created_date":"2025-10-10T00:00:00"}
