{"id":"https://openalex.org/W1570515258","doi":"https://doi.org/10.1007/978-3-642-23538-2_9","title":"Automatic Topic Identification for Large Scale Language Modeling Data Filtering","display_name":"Automatic Topic Identification for Large Scale Language Modeling Data Filtering","publication_year":2011,"publication_date":"2011-01-01","ids":{"openalex":"https://openalex.org/W1570515258","doi":"https://doi.org/10.1007/978-3-642-23538-2_9","mag":"1570515258"},"language":"en","primary_location":{"id":"doi:10.1007/978-3-642-23538-2_9","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-3-642-23538-2_9","pdf_url":null,"source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112005905","display_name":"Lucie Skorkovsk\u00e1","orcid":null},"institutions":[{"id":"https://openalex.org/I92715842","display_name":"University of West Bohemia in Pilsen","ror":"https://ror.org/040t43x18","country_code":"CZ","type":"education","lineage":["https://openalex.org/I92715842"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Lucie Skorkovsk\u00e1","raw_affiliation_strings":["Faculty of Applied Sciences, Dept. of Cybernetics, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","University of West Bohemia, Faculty of Applied Sciences, Department of Cybernetics, Plze\u0148, Czech Republic"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Applied Sciences, Dept. of Cybernetics, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","institution_ids":["https://openalex.org/I92715842"]},{"raw_affiliation_string":"University of West Bohemia, Faculty of Applied Sciences, Department of Cybernetics, Plze\u0148, Czech Republic","institution_ids":["https://openalex.org/I92715842"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047784245","display_name":"Pavel Ircing","orcid":"https://orcid.org/0000-0001-6967-1687"},"institutions":[{"id":"https://openalex.org/I92715842","display_name":"University of West Bohemia in Pilsen","ror":"https://ror.org/040t43x18","country_code":"CZ","type":"education","lineage":["https://openalex.org/I92715842"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Pavel Ircing","raw_affiliation_strings":["Faculty of Applied Sciences, Dept. of Cybernetics, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","University of West Bohemia, Faculty of Applied Sciences, Department of Cybernetics, Plze\u0148, Czech Republic"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Applied Sciences, Dept. of Cybernetics, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","institution_ids":["https://openalex.org/I92715842"]},{"raw_affiliation_string":"University of West Bohemia, Faculty of Applied Sciences, Department of Cybernetics, Plze\u0148, Czech Republic","institution_ids":["https://openalex.org/I92715842"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010512605","display_name":"Ale\u0161 Pra\u017e\u00e1k","orcid":"https://orcid.org/0000-0001-9453-0034"},"institutions":[{"id":"https://openalex.org/I92715842","display_name":"University of West Bohemia in Pilsen","ror":"https://ror.org/040t43x18","country_code":"CZ","type":"education","lineage":["https://openalex.org/I92715842"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Ale\u0161 Pra\u017e\u00e1k","raw_affiliation_strings":["Faculty of Applied Sciences, Dept. of Cybernetics, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","University of West Bohemia, Faculty of Applied Sciences, Department of Cybernetics, Plze\u0148, Czech Republic"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Applied Sciences, Dept. of Cybernetics, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","institution_ids":["https://openalex.org/I92715842"]},{"raw_affiliation_string":"University of West Bohemia, Faculty of Applied Sciences, Department of Cybernetics, Plze\u0148, Czech Republic","institution_ids":["https://openalex.org/I92715842"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085434299","display_name":"Jan Lehe\u010dka","orcid":"https://orcid.org/0000-0002-3889-8069"},"institutions":[{"id":"https://openalex.org/I92715842","display_name":"University of West Bohemia in Pilsen","ror":"https://ror.org/040t43x18","country_code":"CZ","type":"education","lineage":["https://openalex.org/I92715842"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Jan Lehe\u010dka","raw_affiliation_strings":["Faculty of Applied Sciences, Dept. of Cybernetics, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","University of West Bohemia, Faculty of Applied Sciences, Department of Cybernetics, Plze\u0148, Czech Republic"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Applied Sciences, Dept. of Cybernetics, University of West Bohemia, Univerzitn\u00ed 8, 306 14, Plze\u0148, Czech Republic","institution_ids":["https://openalex.org/I92715842"]},{"raw_affiliation_string":"University of West Bohemia, Faculty of Applied Sciences, Department of Cybernetics, Plze\u0148, Czech Republic","institution_ids":["https://openalex.org/I92715842"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":5000,"currency":"EUR","value_usd":5392},"apc_paid":null,"fwci":3.7271,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.93675865,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"64","last_page":"71"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.915442168712616},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.7484323978424072},{"id":"https://openalex.org/keywords/hierarchy","display_name":"Hierarchy","score":0.6061144471168518},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5407945513725281},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.5329948663711548},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5087035298347473},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.47897958755493164},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4623473882675171},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.43419957160949707},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.43079429864883423},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.21886172890663147}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.915442168712616},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.7484323978424072},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.6061144471168518},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5407945513725281},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.5329948663711548},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5087035298347473},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.47897958755493164},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4623473882675171},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.43419957160949707},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.43079429864883423},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.21886172890663147},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C34447519","wikidata":"https://www.wikidata.org/wiki/Q179522","display_name":"Market economy","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/978-3-642-23538-2_9","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-3-642-23538-2_9","pdf_url":null,"source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},{"id":"pmh:oai:dspace5.zcu.cz:11025/16984","is_oa":false,"landing_page_url":"http://hdl.handle.net/11025/16984","pdf_url":null,"source":{"id":"https://openalex.org/S4306400586","display_name":"Digital Library (University of West Bohemia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I92715842","host_organization_name":"University of West Bohemia in Pilsen","host_organization_lineage":["https://openalex.org/I92715842"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"\u010dl\u00e1nek"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7599999904632568,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W1516078819","https://openalex.org/W1523989137","https://openalex.org/W1532325895","https://openalex.org/W1563905377","https://openalex.org/W1596859431","https://openalex.org/W1631260214","https://openalex.org/W1817992165","https://openalex.org/W2139545398","https://openalex.org/W2149684865","https://openalex.org/W2505312721","https://openalex.org/W4213009331"],"related_works":["https://openalex.org/W2365264209","https://openalex.org/W962203960","https://openalex.org/W2026999166","https://openalex.org/W3213549959","https://openalex.org/W3129739276","https://openalex.org/W3108387573","https://openalex.org/W2924380321","https://openalex.org/W3082797515","https://openalex.org/W2271356425","https://openalex.org/W4383616786"],"abstract_inverted_index":null,"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2015,"cited_by_count":5},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
