{"id":"https://openalex.org/W2170798490","doi":"https://doi.org/10.1145/2463676.2465289","title":"Improving regular-expression matching on strings using negative factors","display_name":"Improving regular-expression matching on strings using negative factors","publication_year":2013,"publication_date":"2013-06-22","ids":{"openalex":"https://openalex.org/W2170798490","doi":"https://doi.org/10.1145/2463676.2465289","mag":"2170798490"},"language":"en","primary_location":{"id":"doi:10.1145/2463676.2465289","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2463676.2465289","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2013 ACM SIGMOD International Conference on Management of Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079763362","display_name":"Xiaochun Yang","orcid":"https://orcid.org/0000-0002-6184-4771"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiaochun Yang","raw_affiliation_strings":["Northeastern University, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100602173","display_name":"Bin Wang","orcid":"https://orcid.org/0000-0002-5942-847X"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin Wang","raw_affiliation_strings":["Northeastern University, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075593602","display_name":"Tao Qiu","orcid":"https://orcid.org/0000-0002-4406-5872"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Qiu","raw_affiliation_strings":["Northeastern University, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042198535","display_name":"Yaoshu Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaoshu Wang","raw_affiliation_strings":["Northeastern University, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100369763","display_name":"Chen Li","orcid":"https://orcid.org/0000-0001-6249-8957"},"institutions":[{"id":"https://openalex.org/I4210137750","display_name":"UC Irvine Health","ror":"https://ror.org/03fgher32","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210137750"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chen Li","raw_affiliation_strings":["UC Irvine, Irvine, USA"],"affiliations":[{"raw_affiliation_string":"UC Irvine, Irvine, USA","institution_ids":["https://openalex.org/I4210137750"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5079763362"],"corresponding_institution_ids":["https://openalex.org/I9224756"],"apc_list":null,"apc_paid":null,"fwci":0.9885,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.83687815,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"361","last_page":"372"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/substring","display_name":"Substring","score":0.941929817199707},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8038918972015381},{"id":"https://openalex.org/keywords/regular-expression","display_name":"Regular expression","score":0.7673276662826538},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.7201236486434937},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.5433640480041504},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5365680456161499},{"id":"https://openalex.org/keywords/string-searching-algorithm","display_name":"String searching algorithm","score":0.5187740325927734},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4986305236816406},{"id":"https://openalex.org/keywords/automaton","display_name":"Automaton","score":0.4748561382293701},{"id":"https://openalex.org/keywords/expression","display_name":"Expression (computer science)","score":0.47484996914863586},{"id":"https://openalex.org/keywords/pattern-matching","display_name":"Pattern matching","score":0.46357208490371704},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.4120680093765259},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.38582903146743774},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3491605520248413},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32300126552581787},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.2665192782878876},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1209394633769989},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10531795024871826}],"concepts":[{"id":"https://openalex.org/C182407805","wikidata":"https://www.wikidata.org/wiki/Q2626534","display_name":"Substring","level":3,"score":0.941929817199707},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8038918972015381},{"id":"https://openalex.org/C121329065","wikidata":"https://www.wikidata.org/wiki/Q185612","display_name":"Regular expression","level":2,"score":0.7673276662826538},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.7201236486434937},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.5433640480041504},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5365680456161499},{"id":"https://openalex.org/C7757238","wikidata":"https://www.wikidata.org/wiki/Q374040","display_name":"String searching algorithm","level":3,"score":0.5187740325927734},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4986305236816406},{"id":"https://openalex.org/C112505250","wikidata":"https://www.wikidata.org/wiki/Q787116","display_name":"Automaton","level":2,"score":0.4748561382293701},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.47484996914863586},{"id":"https://openalex.org/C68859911","wikidata":"https://www.wikidata.org/wiki/Q1503724","display_name":"Pattern matching","level":2,"score":0.46357208490371704},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4120680093765259},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.38582903146743774},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3491605520248413},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32300126552581787},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2665192782878876},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1209394633769989},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10531795024871826},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2463676.2465289","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2463676.2465289","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2013 ACM SIGMOD International Conference on Management of Data","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.719.2442","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.719.2442","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://dbgroup.cs.tsinghua.edu.cn/dd/list/5.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W142858687","https://openalex.org/W1555212249","https://openalex.org/W1560450186","https://openalex.org/W1992020000","https://openalex.org/W2002089154","https://openalex.org/W2024029811","https://openalex.org/W2032377275","https://openalex.org/W2045821558","https://openalex.org/W2089302579","https://openalex.org/W2092587157","https://openalex.org/W2129713651","https://openalex.org/W2162481186","https://openalex.org/W2408583837","https://openalex.org/W2436553880","https://openalex.org/W2602645338","https://openalex.org/W4302339081","https://openalex.org/W6605783841","https://openalex.org/W6713384249"],"related_works":["https://openalex.org/W2902482624","https://openalex.org/W2902555313","https://openalex.org/W1492858093","https://openalex.org/W2376315482","https://openalex.org/W2092552144","https://openalex.org/W4249054680","https://openalex.org/W3112977702","https://openalex.org/W2218650848","https://openalex.org/W2366227422","https://openalex.org/W3216580934"],"abstract_inverted_index":{"The":[0],"problem":[1],"of":[2,5,39,87,110,167,179,189,223],"finding":[3,175],"matches":[4],"a":[6,11,64,107,176,185],"regular":[7],"expression":[8],"(RE)":[9],"on":[10,192],"string":[12],"exists":[13],"in":[14,33,81,136,153,213],"many":[15,51,97],"applications":[16],"such":[17],"as":[18],"text":[19,201,235],"editing,":[20],"biosequence":[21],"search,":[22],"and":[23,170,200,203],"shell":[24],"commands.":[25],"Existing":[26],"techniques":[27,45],"first":[28],"identify":[29],"candidates":[30],"using":[31,41,129],"substrings":[32,77],"the":[34,88,164,205,211,220,224],"RE,":[35],"then":[36,125],"verify":[37],"each":[38,154],"them":[40],"an":[42,82,115,172],"automaton.":[43],"These":[44],"become":[46],"inefficient":[47],"when":[48,143,209],"there":[49],"are":[50,76],"candidate":[52],"occurrences":[53],"that":[54,67,78,91,118,140,150],"need":[55],"to":[56,100,122,132,231],"be":[57,94],"verified.":[58],"In":[59],"this":[60,111,190],"paper":[61],"we":[62],"propose":[63],"novel":[65],"technique":[66,89,191,212],"prunes":[68],"false":[69],"negatives":[70],"by":[71,128,229],"utilizing":[72],"negative":[73,120,134,141,168,181],"factors,":[74,142,169],"which":[75],"cannot":[79],"appear":[80,152],"answer.":[83],"A":[84],"main":[85],"advantage":[86],"is":[90],"it":[92,127,218],"can":[93,156],"integrated":[95],"with":[96,146],"existing":[98,214],"algorithms":[99],"improve":[101,126],"their":[102],"efficiency":[103],"significantly.":[104],"We":[105,113,138,162,183],"give":[106],"full":[108],"specification":[109],"technique.":[112],"develop":[114,171],"efficient":[116],"algorithm":[117,173],"utilizes":[119],"factors":[121,135,148],"prune":[123],"candidates,":[124],"bit":[130],"operations":[131],"process":[133],"parallel.":[137],"show":[139,204],"used":[144],"together":[145],"necessary":[147],"(substrings":[149],"must":[151],"answer),":[155],"achieve":[157],"much":[158],"better":[159],"pruning":[160],"power.":[161],"analyze":[163],"large":[165],"number":[166,178],"for":[174,234],"small":[177],"high-quality":[180],"factors.":[182],"conducted":[184],"thorough":[186],"experimental":[187],"study":[188],"real":[193],"data":[194],"sets,":[195],"including":[196],"DNA":[197],"sequences,":[198],"proteins,":[199],"documents,":[202],"significant":[206],"performance":[207],"improvement":[208],"applying":[210],"algorithms.":[215],"For":[216],"instance,":[217],"improved":[219],"search":[221],"speed":[222],"popular":[225],"Gnu":[226],"Grep":[227],"tool":[228],"11":[230],"74":[232],"times":[233],"documents.":[236]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
