{"id":"https://openalex.org/W1604443936","doi":"https://doi.org/10.3233/978-1-58603-975-2-23","title":"Finite-State Machines for Mining Patterns in Very Large Text Repositories","display_name":"Finite-State Machines for Mining Patterns in Very Large Text Repositories","publication_year":2009,"publication_date":"2009-01-01","ids":{"openalex":"https://openalex.org/W1604443936","doi":"https://doi.org/10.3233/978-1-58603-975-2-23","mag":"1604443936"},"language":"en","primary_location":{"id":"doi:10.3233/978-1-58603-975-2-23","is_oa":false,"landing_page_url":"https://doi.org/10.3233/978-1-58603-975-2-23","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076228599","display_name":"Wojciech Skut","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Skut Wojciech","raw_affiliation_strings":["Google Inc., 1600 Amphitheatre Pkwy, Mountain View, CA, 94043, USA, wwskut@gmail.com#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Inc., 1600 Amphitheatre Pkwy, Mountain View, CA, 94043, USA, wwskut@gmail.com#TAB#","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5076228599"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.13839849,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"23","last_page":"23"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9822999835014343,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9822999835014343,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9336000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9266999959945679,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/finite-state-machine","display_name":"Finite-state machine","score":0.6461079120635986},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.607162594795227},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5996339321136475},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4049299955368042},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.34314262866973877},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1517033874988556}],"concepts":[{"id":"https://openalex.org/C167822520","wikidata":"https://www.wikidata.org/wiki/Q176452","display_name":"Finite-state machine","level":2,"score":0.6461079120635986},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.607162594795227},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5996339321136475},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4049299955368042},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.34314262866973877},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1517033874988556}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3233/978-1-58603-975-2-23","is_oa":false,"landing_page_url":"https://doi.org/10.3233/978-1-58603-975-2-23","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},{"id":"mag:1604443936","is_oa":false,"landing_page_url":"https://dblp.uni-trier.de/db/conf/fsmnlp/fsmnlp2008.html#Skut08","pdf_url":null,"source":{"id":"https://openalex.org/S4306418424","display_name":"Finite-State Methods and Natural Language Processing","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"Finite-State Methods and Natural Language Processing","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.699999988079071,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"The":[0,127],"emergence":[1],"of":[2,13,32,34,50,60,64,69,107,114,121],"WWW":[3],"search":[4],"engines":[5],"since":[6],"the":[7,11,58,61,65,92,96,122],"1990s":[8],"has":[9],"changed":[10],"scale":[12,59],"many":[14],"natural":[15],"language":[16],"processing":[17],"applications.":[18,56],"Text":[19],"mining,":[20],"information":[21],"extraction":[22],"and":[23,118],"related":[24],"tasks":[25],"can":[26],"now":[27],"be":[28,72,125],"applied":[29],"to":[30,71,79],"tens":[31],"billions":[33],"documents,":[35],"which":[36],"sets":[37],"new":[38],"efficiency":[39],"standards":[40],"for":[41,54,103],"NLP":[42],"algorithms.":[43],"Finite-state":[44],"machines":[45],"are":[46],"an":[47],"obvious":[48],"choice":[49],"a":[51,76,98,111],"formal":[52],"framework":[53],"such":[55],"However,":[57],"problem":[62,77],"(size":[63],"searchable":[66],"corpus,":[67],"number":[68],"patterns":[70,109],"matched)":[73],"often":[74],"poses":[75],"even":[78],"well-established":[80],"finite-state":[81,99],"string":[82],"matching":[83,100],"techniques.":[84],"In":[85],"my":[86],"presentation,":[87],"I":[88],"will":[89,124],"focus":[90],"on":[91,131],"experience":[93],"gained":[94],"in":[95,110],"implementation":[97],"library":[101,128],"optimized":[102],"searching":[104],"large":[105],"amounts":[106],"complex":[108],"WWW-scale":[112],"repository":[113],"documents.":[115],"Both":[116],"algorithmic":[117],"implementation-related":[119],"aspects":[120],"task":[123],"discussed.":[126],"is":[129],"based":[130],"OpenFST.":[132]},"counts_by_year":[{"year":2017,"cited_by_count":1},{"year":2013,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2016-06-24T00:00:00"}
