{"id":"https://openalex.org/W2136726363","doi":"https://doi.org/10.14778/1454159.1454245","title":"XML-document-filtering automaton","display_name":"XML-document-filtering automaton","publication_year":2008,"publication_date":"2008-08-01","ids":{"openalex":"https://openalex.org/W2136726363","doi":"https://doi.org/10.14778/1454159.1454245","mag":"2136726363"},"language":"en","primary_location":{"id":"doi:10.14778/1454159.1454245","is_oa":false,"landing_page_url":"https://doi.org/10.14778/1454159.1454245","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018726820","display_name":"Panu Silvasti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Panu Silvasti","raw_affiliation_strings":["Helsinki University of Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Helsinki University of Technology","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007507561","display_name":"Seppo Sippu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Seppo Sippu","raw_affiliation_strings":[""],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004458646","display_name":"Eljas Soisalon-Soininen","orcid":"https://orcid.org/0000-0001-6437-2127"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Eljas Soisalon-Soininen","raw_affiliation_strings":[""],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.7633,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.88283931,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"1","issue":"2","first_page":"1666","last_page":"1671"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/xpath","display_name":"XPath","score":0.9201827049255371},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8067171573638916},{"id":"https://openalex.org/keywords/xml","display_name":"XML","score":0.5825634002685547},{"id":"https://openalex.org/keywords/streaming-xml","display_name":"Streaming XML","score":0.5640919208526611},{"id":"https://openalex.org/keywords/simple-api-for-xml","display_name":"Simple API for XML","score":0.5557380318641663},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.5199267268180847},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.5145106315612793},{"id":"https://openalex.org/keywords/automaton","display_name":"Automaton","score":0.4503917396068573},{"id":"https://openalex.org/keywords/regular-expression","display_name":"Regular expression","score":0.4412262439727783},{"id":"https://openalex.org/keywords/xml-database","display_name":"XML database","score":0.412431001663208},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3864271640777588},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3650761544704437},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.35856032371520996},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.333002507686615},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.32099026441574097},{"id":"https://openalex.org/keywords/efficient-xml-interchange","display_name":"Efficient XML Interchange","score":0.2892242670059204},{"id":"https://openalex.org/keywords/xml-signature","display_name":"XML Signature","score":0.25868701934814453},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.0818968415260315}],"concepts":[{"id":"https://openalex.org/C2780213375","wikidata":"https://www.wikidata.org/wiki/Q16340","display_name":"XPath","level":4,"score":0.9201827049255371},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8067171573638916},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.5825634002685547},{"id":"https://openalex.org/C44883583","wikidata":"https://www.wikidata.org/wiki/Q7622687","display_name":"Streaming XML","level":3,"score":0.5640919208526611},{"id":"https://openalex.org/C8595896","wikidata":"https://www.wikidata.org/wiki/Q577094","display_name":"Simple API for XML","level":5,"score":0.5557380318641663},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.5199267268180847},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.5145106315612793},{"id":"https://openalex.org/C112505250","wikidata":"https://www.wikidata.org/wiki/Q787116","display_name":"Automaton","level":2,"score":0.4503917396068573},{"id":"https://openalex.org/C121329065","wikidata":"https://www.wikidata.org/wiki/Q185612","display_name":"Regular expression","level":2,"score":0.4412262439727783},{"id":"https://openalex.org/C183068750","wikidata":"https://www.wikidata.org/wiki/Q357393","display_name":"XML database","level":3,"score":0.412431001663208},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3864271640777588},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3650761544704437},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35856032371520996},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.333002507686615},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.32099026441574097},{"id":"https://openalex.org/C11508877","wikidata":"https://www.wikidata.org/wiki/Q1124477","display_name":"Efficient XML Interchange","level":3,"score":0.2892242670059204},{"id":"https://openalex.org/C34330436","wikidata":"https://www.wikidata.org/wiki/Q979532","display_name":"XML Signature","level":4,"score":0.25868701934814453},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0818968415260315}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.14778/1454159.1454245","is_oa":false,"landing_page_url":"https://doi.org/10.14778/1454159.1454245","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.140.8090","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.140.8090","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.vldb.org/pvldb/1/1454245.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.41999998688697815,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1538131655","https://openalex.org/W1749554492","https://openalex.org/W1908950999","https://openalex.org/W1912713675","https://openalex.org/W1969783316","https://openalex.org/W1970659131","https://openalex.org/W1973190970","https://openalex.org/W2099290142","https://openalex.org/W2099964107","https://openalex.org/W2117020831","https://openalex.org/W2129097092","https://openalex.org/W2135611729","https://openalex.org/W2147339988","https://openalex.org/W3158104671"],"related_works":["https://openalex.org/W1597906172","https://openalex.org/W2146200818","https://openalex.org/W2465015636","https://openalex.org/W2298092871","https://openalex.org/W2096267046","https://openalex.org/W1525887267","https://openalex.org/W165852420","https://openalex.org/W207633616","https://openalex.org/W3129453902","https://openalex.org/W2096927233"],"abstract_inverted_index":{"In":[0,122],"a":[1,24,33,45,61,82,126,133,150,154],"publish-subscribe":[2],"system":[3,22],"based":[4,51],"on":[5,52],"filtering":[6],"of":[7,26,37,67,70,94,116,125,135,156],"XML":[8,27,77],"documents":[9,28,38,78],"subscribers":[10,32],"specify":[11],"their":[12],"interests":[13],"with":[14,153],"profiles":[15],"expressed":[16],"in":[17,64,90],"the":[18,41,53,65,68,71,76,88,91,95,107,123],"XPath":[19,108,111,118],"language.":[20],"The":[21,58,110],"processes":[23],"stream":[25],"and":[29,103,142],"delivers":[30],"to":[31,81,97],"notification":[34],"or":[35],"content":[36],"that":[39,49,75],"match":[40],"profiles.":[42],"We":[43,73],"present":[44],"new":[46],"XML-document-filtering":[47],"algorithm":[48,86],"is":[50],"classic":[54],"Aho-Corasick":[55],"pattern-matching":[56],"automaton.":[57],"automaton":[59,96],"has":[60],"size":[62],"linear":[63,117],"sum":[66],"sizes":[69],"filters.":[72,109],"assume":[74],"all":[79],"conform":[80],"given":[83],"DTD;":[84],"our":[85],"utilizes":[87],"DTD":[89],"preprocessing":[92],"phase":[93],"prune":[98],"out":[99],"descendant":[100],"axes":[101],"(//)":[102],"wildcards":[104],"(*)":[105],"from":[106],"subset":[112],"currently":[113],"supported":[114],"consists":[115],"expressions":[119],"without":[120],"predicates.":[121],"case":[124],"683":[127],"MB":[128],"protein-sequence":[129],"database,":[130],"we":[131],"obtained":[132],"throughput":[134,155],"18.8":[136],"MB/sec":[137,144],"for":[138,145],"50":[139],"000":[140,147],"filters":[141],"17.0":[143],"500":[146],"filters,":[148],"using":[149],"SAX":[151],"parser":[152],"27":[157],"MB/sec.":[158]},"counts_by_year":[{"year":2020,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
