{"id":"https://openalex.org/W4417403795","doi":"https://doi.org/10.1109/pact65351.2025.00023","title":"ANG: Accelerating NFA processing on GPUs via Exploring Multi-Level Fine-Grained Parallelism","display_name":"ANG: Accelerating NFA processing on GPUs via Exploring Multi-Level Fine-Grained Parallelism","publication_year":2025,"publication_date":"2025-11-03","ids":{"openalex":"https://openalex.org/W4417403795","doi":"https://doi.org/10.1109/pact65351.2025.00023"},"language":null,"primary_location":{"id":"doi:10.1109/pact65351.2025.00023","is_oa":false,"landing_page_url":"https://doi.org/10.1109/pact65351.2025.00023","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 34th International Conference on Parallel Architectures and Compilation Techniques (PACT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070236689","display_name":"Yu Guang Wang","orcid":"https://orcid.org/0000-0002-7450-0273"},"institutions":[{"id":"https://openalex.org/I11957088","display_name":"Michigan Technological University","ror":"https://ror.org/0036rpn28","country_code":"US","type":"education","lineage":["https://openalex.org/I11957088"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yuguang Wang","raw_affiliation_strings":["Michigan Technological University"],"affiliations":[{"raw_affiliation_string":"Michigan Technological University","institution_ids":["https://openalex.org/I11957088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079982993","display_name":"Yunmo Zhang","orcid":"https://orcid.org/0000-0002-7462-2780"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yunmo Zhang","raw_affiliation_strings":["City University of Hong Kong"],"affiliations":[{"raw_affiliation_string":"City University of Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031586079","display_name":"Ziran Liu","orcid":"https://orcid.org/0000-0002-8980-7374"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Zeyu Liu","raw_affiliation_strings":["City University of Hong Kong"],"affiliations":[{"raw_affiliation_string":"City University of Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013012522","display_name":"Junqiao Qiu","orcid":"https://orcid.org/0000-0001-7776-3944"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Junqiao Qiu","raw_affiliation_strings":["City University of Hong Kong"],"affiliations":[{"raw_affiliation_string":"City University of Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101925944","display_name":"Zhenlin Wang","orcid":"https://orcid.org/0000-0002-0429-4371"},"institutions":[{"id":"https://openalex.org/I11957088","display_name":"Michigan Technological University","ror":"https://ror.org/0036rpn28","country_code":"US","type":"education","lineage":["https://openalex.org/I11957088"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhenlin Wang","raw_affiliation_strings":["Michigan Technological University"],"affiliations":[{"raw_affiliation_string":"Michigan Technological University","institution_ids":["https://openalex.org/I11957088"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5070236689"],"corresponding_institution_ids":["https://openalex.org/I11957088"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.43490391,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"135","last_page":"147"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.5174999833106995,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.5174999833106995,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.133200004696846,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11567","display_name":"semigroups and automata theory","score":0.020999999716877937,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task-parallelism","display_name":"Task parallelism","score":0.611299991607666},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.5685999989509583},{"id":"https://openalex.org/keywords/instruction-level-parallelism","display_name":"Instruction-level parallelism","score":0.5537999868392944},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.5514000058174133},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.4830000102519989},{"id":"https://openalex.org/keywords/implicit-parallelism","display_name":"Implicit parallelism","score":0.460099995136261},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.45829999446868896},{"id":"https://openalex.org/keywords/automaton","display_name":"Automaton","score":0.44200000166893005},{"id":"https://openalex.org/keywords/parallel-processing","display_name":"Parallel processing","score":0.420199990272522}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8565999865531921},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7580999732017517},{"id":"https://openalex.org/C42992933","wikidata":"https://www.wikidata.org/wiki/Q691169","display_name":"Task parallelism","level":3,"score":0.611299991607666},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.5685999989509583},{"id":"https://openalex.org/C140763907","wikidata":"https://www.wikidata.org/wiki/Q2714055","display_name":"Instruction-level parallelism","level":3,"score":0.5537999868392944},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.5514000058174133},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.4830000102519989},{"id":"https://openalex.org/C3543717","wikidata":"https://www.wikidata.org/wiki/Q6007302","display_name":"Implicit parallelism","level":4,"score":0.460099995136261},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.45829999446868896},{"id":"https://openalex.org/C112505250","wikidata":"https://www.wikidata.org/wiki/Q787116","display_name":"Automaton","level":2,"score":0.44200000166893005},{"id":"https://openalex.org/C106515295","wikidata":"https://www.wikidata.org/wiki/Q26806595","display_name":"Parallel processing","level":2,"score":0.420199990272522},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4113999903202057},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.40299999713897705},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4016000032424927},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.3849000036716461},{"id":"https://openalex.org/C15296174","wikidata":"https://www.wikidata.org/wiki/Q7575343","display_name":"Speculative multithreading","level":4,"score":0.3605000078678131},{"id":"https://openalex.org/C164833996","wikidata":"https://www.wikidata.org/wiki/Q2323839","display_name":"Automatic parallelization","level":3,"score":0.33959999680519104},{"id":"https://openalex.org/C138827492","wikidata":"https://www.wikidata.org/wiki/Q6661985","display_name":"Data processing","level":2,"score":0.29120001196861267},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.2904999852180481},{"id":"https://openalex.org/C2776834041","wikidata":"https://www.wikidata.org/wiki/Q25346349","display_name":"Execution model","level":2,"score":0.28459998965263367},{"id":"https://openalex.org/C167822520","wikidata":"https://www.wikidata.org/wiki/Q176452","display_name":"Finite-state machine","level":2,"score":0.27810001373291016},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.272599995136261},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C2781357197","wikidata":"https://www.wikidata.org/wiki/Q5757597","display_name":"High memory","level":2,"score":0.258899986743927},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.25589999556541443},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2556999921798706},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.25380000472068787}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/pact65351.2025.00023","is_oa":false,"landing_page_url":"https://doi.org/10.1109/pact65351.2025.00023","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 34th International Conference on Parallel Architectures and Compilation Techniques (PACT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320322170","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W1980938256","https://openalex.org/W2030377171","https://openalex.org/W2040976214","https://openalex.org/W2058950969","https://openalex.org/W2062949766","https://openalex.org/W2077450865","https://openalex.org/W2087634916","https://openalex.org/W2123730957","https://openalex.org/W2135039871","https://openalex.org/W2149225459","https://openalex.org/W2191468669","https://openalex.org/W2478089729","https://openalex.org/W2507706987","https://openalex.org/W2580537323","https://openalex.org/W2584402701","https://openalex.org/W2617288171","https://openalex.org/W2618550415","https://openalex.org/W2619891821","https://openalex.org/W2772969855","https://openalex.org/W2808825929","https://openalex.org/W2903717232","https://openalex.org/W2904948956","https://openalex.org/W2994400714","https://openalex.org/W3000718375","https://openalex.org/W3010779417","https://openalex.org/W3011836658","https://openalex.org/W3034732732","https://openalex.org/W3109227260","https://openalex.org/W3152803275","https://openalex.org/W3207636857","https://openalex.org/W4200455997","https://openalex.org/W4253103663","https://openalex.org/W4281667938","https://openalex.org/W4285503953","https://openalex.org/W4319870577","https://openalex.org/W4322764349","https://openalex.org/W4362677176","https://openalex.org/W4366825874","https://openalex.org/W4385080045","https://openalex.org/W4386307210","https://openalex.org/W4392265900","https://openalex.org/W4394892700","https://openalex.org/W4396214339","https://openalex.org/W4408886107"],"related_works":[],"abstract_inverted_index":{"Finite":[0,43],"Automata":[1,44],"(FA)":[2],"processing":[3,24,61,165],"is":[4,76],"a":[5,64,145,162,181],"core":[6],"computation":[7],"in":[8,59,78,120,155,214],"various":[9,92],"real-world":[10],"applications.":[11],"Over":[12],"the":[13,96,111,125,176],"past":[14],"decades,":[15],"extensive":[16],"efforts":[17],"have":[18,47],"been":[19],"dedicated":[20],"to":[21,32,199,211],"accelerating":[22],"FA":[23],"on":[25,148,172,180,207],"modern":[26],"parallel":[27],"platforms,":[28],"particularly":[29],"GPUs,":[30],"due":[31],"their":[33],"high":[34],"memory":[35],"bandwidth":[36],"and":[37,49,94],"massive":[38],"hardware":[39],"parallelism.":[40,150],"As":[41],"Non-deterministic":[42],"(NFA)-based":[45],"applications":[46],"strong":[48],"growing":[50],"demands":[51],"for":[52,133],"real-time":[53],"data":[54],"analytics":[55],"nowadays,":[56],"reducing":[57],"latency":[58],"automata":[60],"has":[62],"become":[63],"critical":[65],"priority.":[66],"However,":[67],"existing":[68],"approaches":[69],"face":[70],"significant":[71,195],"challenges":[72,171],"when":[73],"limited":[74],"parallelism":[75,90,119,141],"exposed":[77],"NFA":[79,100,106,135,164],"computations.":[80],"In":[81],"this":[82,127],"work,":[83],"we":[84,109,157],"explore":[85],"opportunities":[86],"of":[87,98,178,183],"introducing":[88],"fine-grained":[89,140],"from":[91],"sources":[93],"addressing":[95],"limitations":[97],"fast":[99],"processing.":[101],"Specifically,":[102],"by":[103,116],"analyzing":[104],"different":[105],"parallelization":[107,131,154],"schemes,":[108],"identify":[110],"major":[112],"performance":[113,196],"issue":[114],"caused":[115],"insufficient":[117],"state-level":[118],"conventional":[121],"designs.":[122],"To":[123,151],"overcome":[124],"bottleneck,":[126],"work":[128],"introduces":[129],"speculative":[130,153],"tailored":[132],"GPU-based":[134],"processing,":[136],"thus":[137],"effectively":[138],"exploiting":[139],"across":[142],"multilevels,":[143],"with":[144,186,202],"particular":[146],"focus":[147],"input-chunk-level":[149],"realize":[152],"practice,":[156],"develop":[158],"$A":[159],"N":[160],"G$,":[161],"latency-oriented":[163],"framework":[166],"that":[167,192],"overcomes":[168],"key":[169],"implementation":[170],"GPUs.":[173],"We":[174],"evaluate":[175],"efficiency":[177],"ANG":[179,193],"set":[182],"representative":[184],"NFAs":[185],"diverse":[187],"properties.":[188],"Experimental":[189],"results":[190],"demonstrate":[191],"achieves":[194],"improvement":[197],"compared":[198],"state-of-theart":[200],"techniques,":[201],"reaching":[203],"$11.74":[204],"\\times$":[205,213],"speedup":[206],"average":[208],"(and":[209],"up":[210],"$49.88":[212],"extreme":[215],"cases).":[216]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-12-16T00:00:00"}
