{"id":"https://openalex.org/W4415250851","doi":"https://doi.org/10.1109/hpec67600.2025.11196239","title":"GCN-Driven CUDA Parameter Optimization for Parallel Triangle Counting in Graphs","display_name":"GCN-Driven CUDA Parameter Optimization for Parallel Triangle Counting in Graphs","publication_year":2025,"publication_date":"2025-09-15","ids":{"openalex":"https://openalex.org/W4415250851","doi":"https://doi.org/10.1109/hpec67600.2025.11196239"},"language":"en","primary_location":{"id":"doi:10.1109/hpec67600.2025.11196239","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196239","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5106055615","display_name":"Hasan Serdar Arikan","orcid":null},"institutions":[{"id":"https://openalex.org/I133999245","display_name":"University of Nevada, Las Vegas","ror":"https://ror.org/0406gha72","country_code":"US","type":"education","lineage":["https://openalex.org/I133999245"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hasan Serdar Arikan","raw_affiliation_strings":["University of Nevada, Las Vegas,Las Vegas,NV,USA"],"affiliations":[{"raw_affiliation_string":"University of Nevada, Las Vegas,Las Vegas,NV,USA","institution_ids":["https://openalex.org/I133999245"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051223866","display_name":"Rakibul Hassan","orcid":"https://orcid.org/0000-0002-9516-8361"},"institutions":[{"id":"https://openalex.org/I133999245","display_name":"University of Nevada, Las Vegas","ror":"https://ror.org/0406gha72","country_code":"US","type":"education","lineage":["https://openalex.org/I133999245"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rakibul Hassan","raw_affiliation_strings":["University of Nevada, Las Vegas,Las Vegas,NV,USA"],"affiliations":[{"raw_affiliation_string":"University of Nevada, Las Vegas,Las Vegas,NV,USA","institution_ids":["https://openalex.org/I133999245"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Shubhashish Kar","orcid":null},"institutions":[{"id":"https://openalex.org/I133999245","display_name":"University of Nevada, Las Vegas","ror":"https://ror.org/0406gha72","country_code":"US","type":"education","lineage":["https://openalex.org/I133999245"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shubhashish Kar","raw_affiliation_strings":["University of Nevada, Las Vegas,Las Vegas,NV,USA"],"affiliations":[{"raw_affiliation_string":"University of Nevada, Las Vegas,Las Vegas,NV,USA","institution_ids":["https://openalex.org/I133999245"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019066795","display_name":"Doru Thom Popovici","orcid":"https://orcid.org/0000-0002-7271-8092"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Doru Thom Popovici","raw_affiliation_strings":["Lawrence Berkeley National Laboratory,Berkeley,CA,USA"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory,Berkeley,CA,USA","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033369839","display_name":"Shaikh Arifuzzaman","orcid":"https://orcid.org/0000-0002-6893-2475"},"institutions":[{"id":"https://openalex.org/I133999245","display_name":"University of Nevada, Las Vegas","ror":"https://ror.org/0406gha72","country_code":"US","type":"education","lineage":["https://openalex.org/I133999245"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shaikh Arifuzzaman","raw_affiliation_strings":["University of Nevada, Las Vegas,Las Vegas,NV,USA"],"affiliations":[{"raw_affiliation_string":"University of Nevada, Las Vegas,Las Vegas,NV,USA","institution_ids":["https://openalex.org/I133999245"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5106055615"],"corresponding_institution_ids":["https://openalex.org/I133999245"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.27477819,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9889000058174133,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9889000058174133,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9120000004768372,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.7063000202178955},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.593500018119812},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.45089998841285706},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.36090001463890076},{"id":"https://openalex.org/keywords/block-size","display_name":"Block size","score":0.3490999937057495},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.34040001034736633},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.33399999141693115},{"id":"https://openalex.org/keywords/graph-partition","display_name":"Graph partition","score":0.3334999978542328}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7354000210762024},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.7063000202178955},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.593500018119812},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.49459999799728394},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.45089998841285706},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.42890000343322754},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4253000020980835},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.36090001463890076},{"id":"https://openalex.org/C41431624","wikidata":"https://www.wikidata.org/wiki/Q1053357","display_name":"Block size","level":3,"score":0.3490999937057495},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.34040001034736633},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.33399999141693115},{"id":"https://openalex.org/C48903430","wikidata":"https://www.wikidata.org/wiki/Q491370","display_name":"Graph partition","level":3,"score":0.3334999978542328},{"id":"https://openalex.org/C19332903","wikidata":"https://www.wikidata.org/wiki/Q7623247","display_name":"Strength of a graph","level":5,"score":0.3188999891281128},{"id":"https://openalex.org/C134727501","wikidata":"https://www.wikidata.org/wiki/Q5597073","display_name":"Graph bandwidth","level":5,"score":0.3111000061035156},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.3061999976634979},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.28299999237060547},{"id":"https://openalex.org/C2986651925","wikidata":"https://www.wikidata.org/wiki/Q1514868","display_name":"Graph algorithms","level":3,"score":0.2728999853134155},{"id":"https://openalex.org/C88230418","wikidata":"https://www.wikidata.org/wiki/Q131476","display_name":"Graph theory","level":2,"score":0.27250000834465027},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.2671000063419342},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.2538999915122986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec67600.2025.11196239","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196239","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W1504291959","https://openalex.org/W2083005303","https://openalex.org/W2132022337","https://openalex.org/W2559876875","https://openalex.org/W2786865931","https://openalex.org/W2809418595","https://openalex.org/W2985331920","https://openalex.org/W3004718539","https://openalex.org/W3097534150","https://openalex.org/W4285413018","https://openalex.org/W4400410928","https://openalex.org/W4401017800"],"related_works":[],"abstract_inverted_index":{"Determining":[0],"optimal":[1,80,154,170,184],"CUDA":[2,91,155],"block":[3,15,81,156,171,196],"size":[4,16,157,178,197],"configurations":[5],"represents":[6],"a":[7,71,176],"critical":[8],"challenge":[9],"in":[10,232],"GPU-based":[11],"graph":[12,60,67,92,97,117,208,213,233],"processing.":[13],"The":[14],"directly":[17],"impacts":[18],"execution":[19,120],"efficiency":[20],"by":[21],"balancing":[22],"kernel":[23],"launch":[24],"overhead":[25],"and":[26,119,148,215,226],"GPU":[27],"occupancy\u2014with":[28],"our":[29,137,190],"experiments":[30],"showing":[31],"performance":[32,204],"variations":[33],"of":[34,59,161],"up":[35,200],"to":[36,52,167,182,201],"24x.":[37],"Existing":[38],"approaches":[39],"rely":[40],"on":[41,66,123],"architectural":[42],"constraints":[43],"such":[44],"as":[45,100,109],"static":[46],"API-based":[47],"prediction":[48],"methods":[49],"that":[50,78,179,189],"fail":[51],"account":[53],"for":[54,90,229],"the":[55,96,110,114,164,169,183],"dynamic":[56],"memory":[57,145],"requirements":[58],"algorithms,":[61],"which":[62],"vary":[63],"significantly":[64,193],"based":[65],"topology.":[68],"We":[69],"propose":[70],"novel":[72],"Graph":[73],"Convolutional":[74],"Network":[75],"(GCN)":[76],"approach":[77],"predicts":[79,153],"sizes":[82],"(64,":[83],"128,":[84],"256,":[85],"512,":[86],"or":[87],"1024":[88],"threads)":[89],"algorithms":[93],"using":[94],"only":[95],"edge":[98],"list":[99],"input.":[101],"Our":[102,151],"three-layer":[103],"GCN":[104],"model":[105,138,152,165,192],"uses":[106],"node":[107],"degree":[108],"primary":[111],"feature,":[112],"capturing":[113],"relationship":[115],"between":[116],"structure":[118],"performance.":[121],"Trained":[122],"over":[124],"100":[125],"diverse":[126,207],"networks":[127],"from":[128],"multiple":[129],"domains":[130],"(social,":[131],"biological,":[132],"web,":[133],"road":[134],"networks,":[135],"etc.),":[136],"effectively":[139],"learns":[140],"how":[141],"topological":[142],"characteristics":[143],"influence":[144],"access":[146],"patterns":[147],"workload":[149],"distribution.":[150],"with":[158],"an":[159],"accuracy":[160],"63%.":[162],"When":[163],"fails":[166],"predict":[168],"size,":[172],"it":[173],"typically":[174],"selects":[175],"suboptimal":[177],"is":[180],"adjacent":[181],"configuration.":[185],"Experimental":[186],"results":[187],"demonstrate":[188],"GCN-based":[191],"outperforms":[194],"fixed":[195],"strategies,":[198],"achieving":[199],"5.3\u00d7":[202],"mean":[203],"improvement":[205],"across":[206],"types.":[209],"This":[210],"work":[211],"bridges":[212],"learning":[214],"high-performance":[216],"computing,":[217],"enabling":[218],"automatic":[219],"parameter":[220],"tuning":[221],"without":[222],"exhaustive":[223],"empirical":[224],"search,":[225],"opens":[227],"avenues":[228],"further":[230],"optimization":[231],"algorithm":[234],"acceleration.":[235]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-16T00:00:00"}
