{"id":"https://openalex.org/W4415250764","doi":"https://doi.org/10.1109/hpec67600.2025.11196089","title":"BUG: Balanced DFS-Based Subgraph Matching with a ReUse Strategy on GPUs","display_name":"BUG: Balanced DFS-Based Subgraph Matching with a ReUse Strategy on GPUs","publication_year":2025,"publication_date":"2025-09-15","ids":{"openalex":"https://openalex.org/W4415250764","doi":"https://doi.org/10.1109/hpec67600.2025.11196089"},"language":"en","primary_location":{"id":"doi:10.1109/hpec67600.2025.11196089","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196089","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101727495","display_name":"Zicang Xu","orcid":"https://orcid.org/0009-0003-3294-5842"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zicang Xu","raw_affiliation_strings":["Peking University,Wangxuan Institute of Computer Technology,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Peking University,Wangxuan Institute of Computer Technology,Beijing,China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033785339","display_name":"Lei Zou","orcid":"https://orcid.org/0000-0002-8586-4400"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Zou","raw_affiliation_strings":["Peking University,Wangxuan Institute of Computer Technology,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Peking University,Wangxuan Institute of Computer Technology,Beijing,China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101727495"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.33921173,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9858999848365784,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9858999848365784,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9745000004768372,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11072","display_name":"Peroxisome Proliferator-Activated Receptors","score":0.951200008392334,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7574999928474426},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.6984000205993652},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5817999839782715},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5339000225067139},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5321999788284302},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.49470001459121704},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4796000123023987},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.4449999928474426},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4300000071525574}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8331999778747559},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7574999928474426},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.6984000205993652},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5817999839782715},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5503000020980835},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5339000225067139},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5321999788284302},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.49470001459121704},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4796000123023987},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.4449999928474426},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4300999939441681},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4300000071525574},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.41769999265670776},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.3869999945163727},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3700999915599823},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.34209999442100525},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.3160000145435333},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.30379998683929443},{"id":"https://openalex.org/C138959212","wikidata":"https://www.wikidata.org/wiki/Q1806783","display_name":"Load balancing (electrical power)","level":3,"score":0.30230000615119934},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.29980000853538513},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.2791999876499176},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.2791000008583069},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.27630001306533813},{"id":"https://openalex.org/C151376022","wikidata":"https://www.wikidata.org/wiki/Q168698","display_name":"Exponential function","level":2,"score":0.2696000039577484},{"id":"https://openalex.org/C2781357197","wikidata":"https://www.wikidata.org/wiki/Q5757597","display_name":"High memory","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.25859999656677246},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.25450000166893005}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec67600.2025.11196089","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196089","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W993733874","https://openalex.org/W2035173902","https://openalex.org/W2132285256","https://openalex.org/W2423652555","https://openalex.org/W2469279958","https://openalex.org/W2608355501","https://openalex.org/W2795196889","https://openalex.org/W2948167003","https://openalex.org/W2948742909","https://openalex.org/W3097653461","https://openalex.org/W3133642374","https://openalex.org/W3137705542","https://openalex.org/W3209182454","https://openalex.org/W4285609551","https://openalex.org/W4321636650","https://openalex.org/W4386709687","https://openalex.org/W4390188682","https://openalex.org/W4400909741"],"related_works":[],"abstract_inverted_index":{"Subgraph":[0],"matching":[1,36],"is":[2],"a":[3,109,119,130,135,161],"fundamental":[4],"problem":[5],"in":[6,12],"graph":[7],"analysis":[8],"with":[9,87],"wide-ranging":[10],"applications":[11],"domains":[13],"such":[14],"as":[15],"bioinformatics,":[16],"fraud":[17],"detection,":[18],"social":[19],"networks,":[20],"and":[21,48,91,134,151],"recommendation":[22],"systems.":[23],"Despite":[24],"extensive":[25],"optimization":[26],"efforts":[27],"on":[28,41,149],"CPU":[29],"platforms,":[30],"the":[31,38,45,74,144],"NP-hard":[32],"nature":[33],"of":[34,52,77,132],"subgraph":[35],"limits":[37],"performance,":[39],"especially":[40],"large-scale":[42],"datasets.":[43],"Leveraging":[44],"massive":[46],"parallelism":[47],"high":[49],"memory":[50,70],"bandwidth":[51],"GPUs":[53],"offers":[54],"significant":[55],"acceleration":[56],"potential.":[57],"However,":[58],"existing":[59],"GPU-based":[60],"solutions":[61],"face":[62],"key":[63],"challenges.":[64],"BFS-based":[65],"approaches":[66],"suffer":[67],"from":[68],"excessive":[69],"consumption":[71],"due":[72],"to":[73,102,138,160,165],"exponential":[75],"growth":[76],"partial":[78],"matches,":[79],"while":[80],"more":[81],"DFS-based":[82],"methods,":[83],"though":[84],"memory-efficient,":[85],"struggle":[86],"severe":[88],"load":[89],"imbalance":[90],"inefficient":[92],"computation":[93],"reuse.In":[94],"this":[95],"work,":[96],"we":[97,107,128],"propose":[98],"two":[99],"complementary":[100],"strategies":[101],"address":[103],"these":[104],"issues.":[105],"First,":[106],"introduce":[108],"work-offload":[110],"mechanism":[111],"that":[112,155],"dynamically":[113],"balances":[114],"workload":[115],"across":[116],"warps":[117],"using":[118],"global":[120],"task":[121],"queue,":[122],"significantly":[123],"improving":[124],"resource":[125],"utilization.":[126],"Second,":[127],"employ":[129],"combination":[131],"symmetry-breaking":[133],"reuse":[136],"strategy":[137],"reduce":[139],"redundant":[140],"set":[141],"intersections":[142],"during":[143],"enumeration":[145],"process.":[146],"The":[147],"experiments":[148],"real-world":[150],"synthetic":[152],"datasets":[153],"demonstrate":[154],"our":[156],"approach":[157],"achieves":[158],"up":[159],"25.59\u00d7":[162],"speedup":[163],"compared":[164],"prior":[166],"works.":[167]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-16T00:00:00"}
