{"id":"https://openalex.org/W3010779417","doi":"https://doi.org/10.1145/3373376.3378471","title":"Why GPUs are Slow at Executing NFAs and How to Make them Faster","display_name":"Why GPUs are Slow at Executing NFAs and How to Make them Faster","publication_year":2020,"publication_date":"2020-03-09","ids":{"openalex":"https://openalex.org/W3010779417","doi":"https://doi.org/10.1145/3373376.3378471","mag":"3010779417"},"language":"en","primary_location":{"id":"doi:10.1145/3373376.3378471","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3373376.3378471","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3373376.3378471","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Fifth International Conference on Architectural Support for Programming Languages and Operating Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3373376.3378471","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079275912","display_name":"Hongyuan Liu","orcid":"https://orcid.org/0000-0002-6961-6394"},"institutions":[{"id":"https://openalex.org/I267592682","display_name":"Williams (United States)","ror":"https://ror.org/007zhvp17","country_code":"US","type":"company","lineage":["https://openalex.org/I267592682"]},{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hongyuan Liu","raw_affiliation_strings":["College of William &amp; Mary, Williamsburg, VA, USA"],"affiliations":[{"raw_affiliation_string":"College of William &amp; Mary, Williamsburg, VA, USA","institution_ids":["https://openalex.org/I16285277","https://openalex.org/I267592682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006053754","display_name":"Sreepathi Pai","orcid":"https://orcid.org/0000-0002-3691-7238"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sreepathi Pai","raw_affiliation_strings":["University of Rochester, Rochester, NY, USA"],"affiliations":[{"raw_affiliation_string":"University of Rochester, Rochester, NY, USA","institution_ids":["https://openalex.org/I5388228"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050350124","display_name":"Adwait Jog","orcid":"https://orcid.org/0000-0002-5525-7204"},"institutions":[{"id":"https://openalex.org/I267592682","display_name":"Williams (United States)","ror":"https://ror.org/007zhvp17","country_code":"US","type":"company","lineage":["https://openalex.org/I267592682"]},{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Adwait Jog","raw_affiliation_strings":["College of William &amp; Mary, Williamsburg, VA, USA"],"affiliations":[{"raw_affiliation_string":"College of William &amp; Mary, Williamsburg, VA, USA","institution_ids":["https://openalex.org/I16285277","https://openalex.org/I267592682"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5079275912"],"corresponding_institution_ids":["https://openalex.org/I16285277","https://openalex.org/I267592682"],"apc_list":null,"apc_paid":null,"fwci":3.3001,"has_fulltext":true,"cited_by_count":28,"citation_normalized_percentile":{"value":0.9230227,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"251","last_page":"265"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9746000170707703,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8922492265701294},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7729620933532715},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.6937580704689026},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.6394135355949402},{"id":"https://openalex.org/keywords/high-memory","display_name":"High memory","score":0.5322226285934448},{"id":"https://openalex.org/keywords/graphics-processing-unit","display_name":"Graphics processing unit","score":0.5216665267944336},{"id":"https://openalex.org/keywords/finite-state-machine","display_name":"Finite-state machine","score":0.47104883193969727},{"id":"https://openalex.org/keywords/massively-parallel","display_name":"Massively parallel","score":0.45377641916275024},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.18976816534996033},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.12088635563850403}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8922492265701294},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7729620933532715},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.6937580704689026},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.6394135355949402},{"id":"https://openalex.org/C2781357197","wikidata":"https://www.wikidata.org/wiki/Q5757597","display_name":"High memory","level":2,"score":0.5322226285934448},{"id":"https://openalex.org/C2779851693","wikidata":"https://www.wikidata.org/wiki/Q183484","display_name":"Graphics processing unit","level":2,"score":0.5216665267944336},{"id":"https://openalex.org/C167822520","wikidata":"https://www.wikidata.org/wiki/Q176452","display_name":"Finite-state machine","level":2,"score":0.47104883193969727},{"id":"https://openalex.org/C190475519","wikidata":"https://www.wikidata.org/wiki/Q544384","display_name":"Massively parallel","level":2,"score":0.45377641916275024},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.18976816534996033},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.12088635563850403}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3373376.3378471","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3373376.3378471","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3373376.3378471","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Fifth International Conference on Architectural Support for Programming Languages and Operating Systems","raw_type":"proceedings-article"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-121658","is_oa":false,"landing_page_url":"http://gateway.isiknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcAuth=LinksAMR&SrcApp=PARTNER_APP&DestLinkType=FullRecord&DestApp=WOS&KeyUT=000541369300017","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference paper"}],"best_oa_location":{"id":"doi:10.1145/3373376.3378471","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3373376.3378471","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3373376.3378471","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Fifth International Conference on Architectural Support for Programming Languages and Operating Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2651608833","display_name":null,"funder_award_id":"1750667","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3727847720","display_name":"CRII: SHF: Design and Analysis of Processing-Near-Memory Enabled GPU Architecture","funder_award_id":"1657336","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3010779417.pdf","grobid_xml":"https://content.openalex.org/works/W3010779417.grobid-xml"},"referenced_works_count":56,"referenced_works":["https://openalex.org/W103634144","https://openalex.org/W1511341314","https://openalex.org/W1552569371","https://openalex.org/W1674877186","https://openalex.org/W1980938256","https://openalex.org/W2030102079","https://openalex.org/W2030377171","https://openalex.org/W2040976214","https://openalex.org/W2044242345","https://openalex.org/W2058950969","https://openalex.org/W2062949766","https://openalex.org/W2077450865","https://openalex.org/W2077934944","https://openalex.org/W2102978741","https://openalex.org/W2107251158","https://openalex.org/W2113422425","https://openalex.org/W2114403135","https://openalex.org/W2132774949","https://openalex.org/W2135039871","https://openalex.org/W2139418336","https://openalex.org/W2141978748","https://openalex.org/W2147025186","https://openalex.org/W2149225459","https://openalex.org/W2162726111","https://openalex.org/W2167971042","https://openalex.org/W2218748714","https://openalex.org/W2236895266","https://openalex.org/W2240191805","https://openalex.org/W2393727648","https://openalex.org/W2434217241","https://openalex.org/W2478089729","https://openalex.org/W2507706987","https://openalex.org/W2523378841","https://openalex.org/W2528800817","https://openalex.org/W2529090470","https://openalex.org/W2530873820","https://openalex.org/W2580537323","https://openalex.org/W2584402701","https://openalex.org/W2617288171","https://openalex.org/W2618550415","https://openalex.org/W2619891821","https://openalex.org/W2728529009","https://openalex.org/W2766073137","https://openalex.org/W2786027348","https://openalex.org/W2794546595","https://openalex.org/W2794757234","https://openalex.org/W2808825929","https://openalex.org/W2903717232","https://openalex.org/W2904948956","https://openalex.org/W2917837049","https://openalex.org/W2954534637","https://openalex.org/W2979735877","https://openalex.org/W3016919608","https://openalex.org/W3201582780","https://openalex.org/W4240018668","https://openalex.org/W4240591052"],"related_works":["https://openalex.org/W2063372669","https://openalex.org/W4367592995","https://openalex.org/W2122511197","https://openalex.org/W2320652536","https://openalex.org/W2735130281","https://openalex.org/W3010779417","https://openalex.org/W2567051523","https://openalex.org/W4236372686","https://openalex.org/W3094401657","https://openalex.org/W2976393426"],"abstract_inverted_index":{"Non-deterministic":[0],"Finite":[1],"Automata":[2,189],"(NFA)":[3],"are":[4,139],"space-efficient":[5],"finite":[6],"state":[7],"machines":[8],"that":[9,81,152],"have":[10],"significant":[11,166],"applications":[12,193],"in":[13,55,82,88],"domains":[14],"such":[15],"as":[16],"pattern":[17],"matching":[18],"and":[19,60],"data":[20,53,76],"analytics.":[21],"In":[22],"this":[23,74],"paper,":[24],"we":[25,50,146],"investigate":[26],"why":[27],"the":[28,38,56,102,123,169,183,202,205],"Graphics":[29],"Processing":[30],"Unit":[31],"(GPU)---a":[32],"massively":[33],"parallel":[34],"computational":[35],"device":[36],"with":[37,157],"highest":[39],"memory":[40,58,70,95,105,159],"bandwidth":[41],"available":[42],"on":[43,141],"general-purpose":[44],"processors---cannot":[45],"efficiently":[46],"execute":[47],"NFAs.":[48,175],"First,":[49],"identify":[51],"excessive":[52,75],"movement":[54],"GPU":[57,137,172],"hierarchy":[59,71],"describe":[61],"how":[62],"to":[63,72,99,128,132,181],"privatize":[64],"reads":[65,96],"effectively":[66,153],"using":[67],"GPU's":[68],"on-chip":[69],"reduce":[73,101],"movement.":[77],"We":[78],"also":[79],"show":[80],"several":[83,192],"cases,":[84],"indirect":[85],"table":[86],"lookups":[87],"NFAs":[89,187],"can":[90],"be":[91],"eliminated":[92],"by":[93],"converting":[94],"into":[97],"computation,":[98],"further":[100],"number":[103],"of":[104,119,126,174,199,204],"reads.":[106],"Although":[107],"our":[108],"optimization":[109],"techniques":[110,121],"significantly":[111],"alleviate":[112],"these":[113,120],"memory-related":[114],"bottlenecks,":[115],"a":[116,148,165],"side":[117],"effect":[118],"is":[122],"static":[124],"assignment":[125],"work":[127],"cores.":[129],"This":[130],"leads":[131],"poor":[133],"compute":[134,155],"utilization,":[135],"where":[136],"cores":[138],"wasted":[140],"idle":[142],"NFA":[143],"states.":[144],"Therefore,":[145],"propose":[147],"new":[149],"dynamic":[150],"scheme":[151],"balances":[154],"utilization":[156],"reduced":[158],"usage.":[160],"Our":[161],"combined":[162],"optimizations":[163],"provide":[164],"improvement":[167],"over":[168],"previous":[170],"state-of-the-art":[171],"implementations":[173],"Moreover,":[176],"they":[177],"enable":[178],"current":[179],"GPUs":[180],"outperform":[182],"domain-specific":[184],"accelerator":[185],"for":[186,201],"(i.e.,":[188],"Processor)":[190],"across":[191],"while":[194],"performing":[195],"within":[196],"an":[197],"order":[198],"magnitude":[200],"rest":[203],"applications.":[206]},"counts_by_year":[{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
