{"id":"https://openalex.org/W1997258539","doi":"https://doi.org/10.1007/s10579-013-9246-z","title":"General framework for mining, processing and storing large amounts of electronic texts for language modeling purposes","display_name":"General framework for mining, processing and storing large amounts of electronic texts for language modeling purposes","publication_year":2013,"publication_date":"2013-07-23","ids":{"openalex":"https://openalex.org/W1997258539","doi":"https://doi.org/10.1007/s10579-013-9246-z","mag":"1997258539"},"language":"en","primary_location":{"id":"doi:10.1007/s10579-013-9246-z","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s10579-013-9246-z","pdf_url":null,"source":{"id":"https://openalex.org/S4306424877","display_name":"Language Resources and Evaluation","issn_l":"1574-020X","issn":["1574-020X","1574-0218"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Language Resources and Evaluation","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072542443","display_name":"J\u00e1n \u0160vec","orcid":"https://orcid.org/0000-0001-8362-5927"},"institutions":[{"id":"https://openalex.org/I92715842","display_name":"University of West Bohemia in Pilsen","ror":"https://ror.org/040t43x18","country_code":"CZ","type":"education","lineage":["https://openalex.org/I92715842"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Jan \u0160vec","raw_affiliation_strings":["Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Plze\u0148, Czech Republic 306 14#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","institution_ids":["https://openalex.org/I92715842"]},{"raw_affiliation_string":"Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Plze\u0148, Czech Republic 306 14#TAB#","institution_ids":["https://openalex.org/I92715842"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085434299","display_name":"Jan Lehe\u010dka","orcid":"https://orcid.org/0000-0002-3889-8069"},"institutions":[{"id":"https://openalex.org/I92715842","display_name":"University of West Bohemia in Pilsen","ror":"https://ror.org/040t43x18","country_code":"CZ","type":"education","lineage":["https://openalex.org/I92715842"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Jan Lehe\u010dka","raw_affiliation_strings":["Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Plze\u0148, Czech Republic 306 14#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","institution_ids":["https://openalex.org/I92715842"]},{"raw_affiliation_string":"Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Plze\u0148, Czech Republic 306 14#TAB#","institution_ids":["https://openalex.org/I92715842"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047784245","display_name":"Pavel Ircing","orcid":"https://orcid.org/0000-0001-6967-1687"},"institutions":[{"id":"https://openalex.org/I92715842","display_name":"University of West Bohemia in Pilsen","ror":"https://ror.org/040t43x18","country_code":"CZ","type":"education","lineage":["https://openalex.org/I92715842"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Pavel Ircing","raw_affiliation_strings":["Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Plze\u0148, Czech Republic 306 14#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","institution_ids":["https://openalex.org/I92715842"]},{"raw_affiliation_string":"Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Plze\u0148, Czech Republic 306 14#TAB#","institution_ids":["https://openalex.org/I92715842"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112005905","display_name":"Lucie Skorkovsk\u00e1","orcid":null},"institutions":[{"id":"https://openalex.org/I92715842","display_name":"University of West Bohemia in Pilsen","ror":"https://ror.org/040t43x18","country_code":"CZ","type":"education","lineage":["https://openalex.org/I92715842"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Lucie Skorkovsk\u00e1","raw_affiliation_strings":["Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Plze\u0148, Czech Republic 306 14#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","institution_ids":["https://openalex.org/I92715842"]},{"raw_affiliation_string":"Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Plze\u0148, Czech Republic 306 14#TAB#","institution_ids":["https://openalex.org/I92715842"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010512605","display_name":"Ale\u0161 Pra\u017e\u00e1k","orcid":"https://orcid.org/0000-0001-9453-0034"},"institutions":[{"id":"https://openalex.org/I92715842","display_name":"University of West Bohemia in Pilsen","ror":"https://ror.org/040t43x18","country_code":"CZ","type":"education","lineage":["https://openalex.org/I92715842"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Ale\u0161 Pra\u017e\u00e1k","raw_affiliation_strings":["Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Plze\u0148, Czech Republic 306 14#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","institution_ids":["https://openalex.org/I92715842"]},{"raw_affiliation_string":"Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Plze\u0148, Czech Republic 306 14#TAB#","institution_ids":["https://openalex.org/I92715842"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035947906","display_name":"Jan Vavru\u0161ka","orcid":"https://orcid.org/0000-0001-7713-3950"},"institutions":[{"id":"https://openalex.org/I92715842","display_name":"University of West Bohemia in Pilsen","ror":"https://ror.org/040t43x18","country_code":"CZ","type":"education","lineage":["https://openalex.org/I92715842"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Jan Vavru\u0161ka","raw_affiliation_strings":["Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Plze\u0148, Czech Republic 306 14#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","institution_ids":["https://openalex.org/I92715842"]},{"raw_affiliation_string":"Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Plze\u0148, Czech Republic 306 14#TAB#","institution_ids":["https://openalex.org/I92715842"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113797690","display_name":"Petr Stanislav","orcid":null},"institutions":[{"id":"https://openalex.org/I92715842","display_name":"University of West Bohemia in Pilsen","ror":"https://ror.org/040t43x18","country_code":"CZ","type":"education","lineage":["https://openalex.org/I92715842"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Petr Stanislav","raw_affiliation_strings":["Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Plze\u0148, Czech Republic 306 14#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","institution_ids":["https://openalex.org/I92715842"]},{"raw_affiliation_string":"Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Plze\u0148, Czech Republic 306 14#TAB#","institution_ids":["https://openalex.org/I92715842"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032416308","display_name":"Jan Hoidekr","orcid":null},"institutions":[{"id":"https://openalex.org/I92715842","display_name":"University of West Bohemia in Pilsen","ror":"https://ror.org/040t43x18","country_code":"CZ","type":"education","lineage":["https://openalex.org/I92715842"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Jan Hoidekr","raw_affiliation_strings":["Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Plze\u0148, Czech Republic 306 14#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","institution_ids":["https://openalex.org/I92715842"]},{"raw_affiliation_string":"Department of Cybernetics, Faculty of Applied Sciences, University of West Bohemia, Plze\u0148, Czech Republic 306 14#TAB#","institution_ids":["https://openalex.org/I92715842"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.0718002,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"48","issue":"2","first_page":"227","last_page":"248"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8679025173187256},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.6011431813240051},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5867374539375305},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5638641715049744},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5264031887054443},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4817124009132385},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.46219828724861145},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4559711515903473},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4327791631221771},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3473421037197113},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1544192135334015}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8679025173187256},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.6011431813240051},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5867374539375305},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5638641715049744},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5264031887054443},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4817124009132385},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.46219828724861145},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4559711515903473},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4327791631221771},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3473421037197113},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1544192135334015},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s10579-013-9246-z","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s10579-013-9246-z","pdf_url":null,"source":{"id":"https://openalex.org/S4306424877","display_name":"Language Resources and Evaluation","issn_l":"1574-020X","issn":["1574-020X","1574-0218"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Language Resources and Evaluation","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7200000286102295}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W26772505","https://openalex.org/W69408855","https://openalex.org/W84079877","https://openalex.org/W146067141","https://openalex.org/W1495542223","https://openalex.org/W1511655776","https://openalex.org/W1516078819","https://openalex.org/W1523989137","https://openalex.org/W1532325895","https://openalex.org/W1542786493","https://openalex.org/W1563905377","https://openalex.org/W1596859431","https://openalex.org/W1631260214","https://openalex.org/W1811872306","https://openalex.org/W1817992165","https://openalex.org/W1882516017","https://openalex.org/W1992952585","https://openalex.org/W2057208313","https://openalex.org/W2075310404","https://openalex.org/W2136780519","https://openalex.org/W2139545398","https://openalex.org/W2144588880","https://openalex.org/W2152565070","https://openalex.org/W2295000399","https://openalex.org/W2550419310","https://openalex.org/W2578576916","https://openalex.org/W4211148787","https://openalex.org/W4213009331","https://openalex.org/W6602789379","https://openalex.org/W6603373351","https://openalex.org/W6980748788"],"related_works":["https://openalex.org/W1603736412","https://openalex.org/W4304185162","https://openalex.org/W2061685118","https://openalex.org/W3006282800","https://openalex.org/W2462100143","https://openalex.org/W1770503332","https://openalex.org/W3125207769","https://openalex.org/W2577825108","https://openalex.org/W2169518243","https://openalex.org/W2086008440"],"abstract_inverted_index":null,"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
