{"id":"https://openalex.org/W3036809771","doi":"https://doi.org/10.1145/3399714","title":"Combining SIMD and Many/Multi-core Parallelism for Finite-state Machines with Enumerative Speculation","display_name":"Combining SIMD and Many/Multi-core Parallelism for Finite-state Machines with Enumerative Speculation","publication_year":2020,"publication_date":"2020-06-21","ids":{"openalex":"https://openalex.org/W3036809771","doi":"https://doi.org/10.1145/3399714","mag":"3036809771"},"language":"en","primary_location":{"id":"doi:10.1145/3399714","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3399714","pdf_url":null,"source":{"id":"https://openalex.org/S2483380313","display_name":"ACM Transactions on Parallel Computing","issn_l":"2329-4949","issn":["2329-4949","2329-4957"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Parallel Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070002691","display_name":"Peng Jiang","orcid":"https://orcid.org/0000-0001-7743-6062"},"institutions":[{"id":"https://openalex.org/I126307644","display_name":"University of Iowa","ror":"https://ror.org/036jqmy94","country_code":"US","type":"education","lineage":["https://openalex.org/I126307644"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Peng Jiang","raw_affiliation_strings":["The University of Iowa, IA"],"affiliations":[{"raw_affiliation_string":"The University of Iowa, IA","institution_ids":["https://openalex.org/I126307644"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073745810","display_name":"Yang Xia","orcid":"https://orcid.org/0000-0002-3163-137X"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yang Xia","raw_affiliation_strings":["The Ohio State University"],"affiliations":[{"raw_affiliation_string":"The Ohio State University","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025342178","display_name":"Gagan Agrawal","orcid":"https://orcid.org/0000-0002-2923-5327"},"institutions":[{"id":"https://openalex.org/I25041050","display_name":"Augusta University","ror":"https://ror.org/012mef835","country_code":"US","type":"education","lineage":["https://openalex.org/I25041050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gagan Agrawal","raw_affiliation_strings":["Augusta University, GA"],"affiliations":[{"raw_affiliation_string":"Augusta University, GA","institution_ids":["https://openalex.org/I25041050"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5070002691"],"corresponding_institution_ids":["https://openalex.org/I126307644"],"apc_list":null,"apc_paid":null,"fwci":0.231,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.46283391,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"7","issue":"3","first_page":"1","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8412355184555054},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.7986679673194885},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7843334078788757},{"id":"https://openalex.org/keywords/speculation","display_name":"Speculation","score":0.5310357809066772},{"id":"https://openalex.org/keywords/regular-expression","display_name":"Regular expression","score":0.47473201155662537},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.415755033493042},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.3135806918144226},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.15187284350395203}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8412355184555054},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.7986679673194885},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7843334078788757},{"id":"https://openalex.org/C47941915","wikidata":"https://www.wikidata.org/wiki/Q107885","display_name":"Speculation","level":2,"score":0.5310357809066772},{"id":"https://openalex.org/C121329065","wikidata":"https://www.wikidata.org/wiki/Q185612","display_name":"Regular expression","level":2,"score":0.47473201155662537},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.415755033493042},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.3135806918144226},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.15187284350395203},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C139719470","wikidata":"https://www.wikidata.org/wiki/Q39680","display_name":"Macroeconomics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3399714","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3399714","pdf_url":null,"source":{"id":"https://openalex.org/S2483380313","display_name":"ACM Transactions on Parallel Computing","issn_l":"2329-4949","issn":["2329-4949","2329-4957"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Parallel Computing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1525776788","https://openalex.org/W1967810725","https://openalex.org/W1977410509","https://openalex.org/W1981902599","https://openalex.org/W1985291160","https://openalex.org/W1987840949","https://openalex.org/W2039417226","https://openalex.org/W2040281526","https://openalex.org/W2104651752","https://openalex.org/W2110199304","https://openalex.org/W2123243337","https://openalex.org/W2132774949","https://openalex.org/W2132967231","https://openalex.org/W2141539412","https://openalex.org/W2143462372","https://openalex.org/W2289880787","https://openalex.org/W2412026823","https://openalex.org/W2507706987","https://openalex.org/W3009142854","https://openalex.org/W4230725352","https://openalex.org/W6676296441"],"related_works":["https://openalex.org/W2766828645","https://openalex.org/W2950520577","https://openalex.org/W1554644772","https://openalex.org/W2494130044","https://openalex.org/W2003935582","https://openalex.org/W3209384898","https://openalex.org/W74409296","https://openalex.org/W1991844655","https://openalex.org/W1595834484","https://openalex.org/W3170887803"],"abstract_inverted_index":{"Finite-state":[0],"Machine":[1],"(FSM)":[2],"is":[3,22],"the":[4,27,121,128,135,229],"key":[5],"kernel":[6],"behind":[7],"many":[8],"popular":[9,186],"applications,":[10],"including":[11,46],"regular":[12,191],"expression":[13,192],"matching,":[14,193],"text":[15],"tokenization,":[16,195],"and":[17,30,42,48,64,77,127,196,209,240,244,277],"Huffman":[18,189],"decoding.":[19],"Parallelizing":[20],"FSMs":[21],"extremely":[23],"difficult":[24],"because":[25],"of":[26,52,93,124,131,145,164,171,218,238,253],"strong":[28],"dependencies":[29],"unpredictable":[31],"memory":[32],"accesses.":[33],"Previous":[34],"efforts":[35],"have":[36,54,59,83,174],"largely":[37],"focused":[38],"on":[39,74,95,206,258,281],"multi-core":[40],"parallelization":[41],"used":[43],"different":[44],"approaches,":[45],"speculative":[47,101,232],"enumerative":[49,89,110,136,156,165,248],"execution,":[50,111],"both":[51],"which":[53],"been":[55],"effective":[56],"but":[57],"also":[58],"limitations.":[60],"With":[61],"increasing":[62],"width":[63],"improving":[65],"flexibility":[66],"in":[67,100,109,134,154,241],"SIMD":[68,76,205,214,243],"instruction":[69],"sets,":[70],"this":[71],"article":[72],"focuses":[73],"combining":[75,213,242],"many/multi-core":[78],"parallelism":[79],"for":[80],"FSMs.":[81],"We":[82,180,198],"developed":[84,175],"a":[85,96,143,168,176,224,259],"novel":[86],"strategy,":[87],"called":[88],"speculation":[90,125,151,166],".":[91],"Instead":[92],"speculating":[94],"single":[97,225],"state":[98],"as":[99,108],"execution":[102,233,249],"or":[103],"enumerating":[104],"all":[105],"possible":[106,118],"states":[107,147],"our":[112,155,182,264],"strategy":[113],"speculates":[114],"transitions":[115],"from":[116],"several":[117],"states,":[119],"reducing":[120],"prediction":[122],"overheads":[123],"approach":[126,141],"large":[129,169],"amount":[130],"redundant":[132],"work":[133],"approach.":[137],"A":[138],"simple":[139],"lookback":[140],"produces":[142],"set":[144],"guessed":[146],"to":[148,160,201,211],"achieve":[149,278],"high":[150],"success":[152],"rates":[153],"speculation.":[157],"In":[158],"addition,":[159],"enable":[161],"continued":[162],"scalability":[163,280],"with":[167,184,215],"number":[170],"threads,":[172],"we":[173,227,261],"parallel":[177,265],"merge":[178,266,275],"method.":[179],"evaluate":[181,257],"method":[183],"four":[185],"FSM":[187],"applications:":[188],"decoding,":[190],"HTML":[194],"Div7.":[197],"obtain":[199],"up":[200,210],"2.5\u00d7":[202],"speedup":[203],"using":[204],"1":[207],"core":[208],"95\u00d7":[212],"60":[216],"cores":[217],"an":[219,236,251,282],"Intel":[220],"Xeon":[221],"Phi.":[222],"On":[223],"core,":[226],"outperform":[228,247],"best":[230],"single-state":[231],"version":[234],"by":[235,250],"average":[237,252],"1.6\u00d7,":[239],"many-core":[245],"parallelism,":[246],"2\u00d7.":[254],"Finally,":[255],"when":[256],"GPU,":[260],"show":[262],"that":[263],"implementations":[267,276],"are":[268],"2.02--6.74\u00d7":[269],"more":[270],"efficient":[271],"than":[272],"corresponding":[273],"sequential":[274],"better":[279],"Nvidia":[283],"V100":[284],"GPU.":[285]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
