{"id":"https://openalex.org/W3011137863","doi":"https://doi.org/10.1007/s10579-020-09487-4","title":"Comparing web-crawled and traditional corpora","display_name":"Comparing web-crawled and traditional corpora","publication_year":2020,"publication_date":"2020-03-19","ids":{"openalex":"https://openalex.org/W3011137863","doi":"https://doi.org/10.1007/s10579-020-09487-4","mag":"3011137863"},"language":"en","primary_location":{"id":"doi:10.1007/s10579-020-09487-4","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s10579-020-09487-4","pdf_url":null,"source":{"id":"https://openalex.org/S4306424877","display_name":"Language Resources and Evaluation","issn_l":"1574-020X","issn":["1574-020X","1574-0218"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Language Resources and Evaluation","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087449310","display_name":"V\u00e1clav Cvr\u010dek","orcid":"https://orcid.org/0000-0003-3977-2393"},"institutions":[{"id":"https://openalex.org/I21250087","display_name":"Charles University","ror":"https://ror.org/024d6js02","country_code":"CZ","type":"education","lineage":["https://openalex.org/I21250087"]}],"countries":["CZ"],"is_corresponding":true,"raw_author_name":"V\u00e1clav Cvr\u010dek","raw_affiliation_strings":["Institute of the Czech National Corpus, Faculty of Arts, Charles University, Prague, Czech Republic"],"raw_orcid":"https://orcid.org/0000-0003-3977-2393","affiliations":[{"raw_affiliation_string":"Institute of the Czech National Corpus, Faculty of Arts, Charles University, Prague, Czech Republic","institution_ids":["https://openalex.org/I21250087"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112472404","display_name":"Zuzana Komrskov\u00e1","orcid":null},"institutions":[{"id":"https://openalex.org/I21250087","display_name":"Charles University","ror":"https://ror.org/024d6js02","country_code":"CZ","type":"education","lineage":["https://openalex.org/I21250087"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Zuzana Komrskov\u00e1","raw_affiliation_strings":["Institute of the Czech National Corpus, Faculty of Arts, Charles University, Prague, Czech Republic"],"raw_orcid":"https://orcid.org/0000-0002-1170-9344","affiliations":[{"raw_affiliation_string":"Institute of the Czech National Corpus, Faculty of Arts, Charles University, Prague, Czech Republic","institution_ids":["https://openalex.org/I21250087"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015140593","display_name":"David Luke\u0161","orcid":"https://orcid.org/0000-0003-0429-6542"},"institutions":[{"id":"https://openalex.org/I21250087","display_name":"Charles University","ror":"https://ror.org/024d6js02","country_code":"CZ","type":"education","lineage":["https://openalex.org/I21250087"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"David Luke\u0161","raw_affiliation_strings":["Institute of the Czech National Corpus, Faculty of Arts, Charles University, Prague, Czech Republic"],"raw_orcid":"https://orcid.org/0000-0003-0429-6542","affiliations":[{"raw_affiliation_string":"Institute of the Czech National Corpus, Faculty of Arts, Charles University, Prague, Czech Republic","institution_ids":["https://openalex.org/I21250087"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035713275","display_name":"Petra Poukarov\u00e1","orcid":"https://orcid.org/0000-0003-3707-6466"},"institutions":[{"id":"https://openalex.org/I21250087","display_name":"Charles University","ror":"https://ror.org/024d6js02","country_code":"CZ","type":"education","lineage":["https://openalex.org/I21250087"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Petra Poukarov\u00e1","raw_affiliation_strings":["Institute of the Czech National Corpus, Faculty of Arts, Charles University, Prague, Czech Republic"],"raw_orcid":"https://orcid.org/0000-0003-3707-6466","affiliations":[{"raw_affiliation_string":"Institute of the Czech National Corpus, Faculty of Arts, Charles University, Prague, Czech Republic","institution_ids":["https://openalex.org/I21250087"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071310121","display_name":"Anna \u0158eho\u0159kov\u00e1","orcid":"https://orcid.org/0000-0002-6676-317X"},"institutions":[{"id":"https://openalex.org/I21250087","display_name":"Charles University","ror":"https://ror.org/024d6js02","country_code":"CZ","type":"education","lineage":["https://openalex.org/I21250087"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Anna \u0158eho\u0159kov\u00e1","raw_affiliation_strings":["Institute of the Czech National Corpus, Faculty of Arts, Charles University, Prague, Czech Republic"],"raw_orcid":"https://orcid.org/0000-0002-6676-317X","affiliations":[{"raw_affiliation_string":"Institute of the Czech National Corpus, Faculty of Arts, Charles University, Prague, Czech Republic","institution_ids":["https://openalex.org/I21250087"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005889125","display_name":"Adrian Jan Zasina","orcid":"https://orcid.org/0000-0001-9348-5833"},"institutions":[{"id":"https://openalex.org/I21250087","display_name":"Charles University","ror":"https://ror.org/024d6js02","country_code":"CZ","type":"education","lineage":["https://openalex.org/I21250087"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Adrian Jan Zasina","raw_affiliation_strings":["Institute of the Czech National Corpus, Faculty of Arts, Charles University, Prague, Czech Republic"],"raw_orcid":"https://orcid.org/0000-0001-9348-5833","affiliations":[{"raw_affiliation_string":"Institute of the Czech National Corpus, Faculty of Arts, Charles University, Prague, Czech Republic","institution_ids":["https://openalex.org/I21250087"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032717207","display_name":"\u0412\u043b\u0430\u0434\u0438\u043c\u0438\u0440 \u0411\u0435\u043d\u043a\u043e","orcid":"https://orcid.org/0000-0002-4600-5515"},"institutions":[{"id":"https://openalex.org/I207624831","display_name":"Slovak Academy of Sciences","ror":"https://ror.org/03h7qq074","country_code":"SK","type":"government","lineage":["https://openalex.org/I207624831"]},{"id":"https://openalex.org/I4210158485","display_name":"Ludovit Stur Institute of Linguistics of the Slovak Academy of Sciences","ror":"https://ror.org/0517k6d93","country_code":"SK","type":"facility","lineage":["https://openalex.org/I207624831","https://openalex.org/I4210158485"]},{"id":"https://openalex.org/I74788687","display_name":"Comenius University Bratislava","ror":"https://ror.org/0587ef340","country_code":"SK","type":"education","lineage":["https://openalex.org/I74788687"]}],"countries":["SK"],"is_corresponding":false,"raw_author_name":"Vladim\u00edr Benko","raw_affiliation_strings":["UNESCO Chair in Plurilingual and Multicultural Communication, Comenius University in Bratislava, Bratislava, Slovakia","\u013d. \u0160t\u00far Institute of Linguistics, Slovak Academy of Sciences, Bratislava, Slovakia"],"raw_orcid":"https://orcid.org/0000-0002-4600-5515","affiliations":[{"raw_affiliation_string":"UNESCO Chair in Plurilingual and Multicultural Communication, Comenius University in Bratislava, Bratislava, Slovakia","institution_ids":["https://openalex.org/I74788687"]},{"raw_affiliation_string":"\u013d. \u0160t\u00far Institute of Linguistics, Slovak Academy of Sciences, Bratislava, Slovakia","institution_ids":["https://openalex.org/I4210158485","https://openalex.org/I207624831"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5087449310"],"corresponding_institution_ids":["https://openalex.org/I21250087"],"apc_list":null,"apc_paid":null,"fwci":1.8232,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.84674752,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"54","issue":"3","first_page":"713","last_page":"745"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13155","display_name":"Digital Communication and Language","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13155","display_name":"Digital Communication and Language","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12353","display_name":"Lexicography and Language Studies","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7608367204666138},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.7199888825416565},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.6830347776412964},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.6508055925369263},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.5896741151809692},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5627017617225647},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5006272792816162},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4986724853515625},{"id":"https://openalex.org/keywords/czech","display_name":"Czech","score":0.4881506562232971},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.48035964369773865},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4561339020729065},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3613075613975525},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3113998770713806},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10276469588279724}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7608367204666138},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.7199888825416565},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.6830347776412964},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.6508055925369263},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.5896741151809692},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5627017617225647},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5006272792816162},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4986724853515625},{"id":"https://openalex.org/C2777842544","wikidata":"https://www.wikidata.org/wiki/Q9056","display_name":"Czech","level":2,"score":0.4881506562232971},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.48035964369773865},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4561339020729065},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3613075613975525},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3113998770713806},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10276469588279724},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C44870925","wikidata":"https://www.wikidata.org/wiki/Q37547","display_name":"Astrophysics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s10579-020-09487-4","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s10579-020-09487-4","pdf_url":null,"source":{"id":"https://openalex.org/S4306424877","display_name":"Language Resources and Evaluation","issn_l":"1574-020X","issn":["1574-020X","1574-0218"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Language Resources and Evaluation","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7599999904632568,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G175660507","display_name":null,"funder_award_id":"CZ.02.1.01/0.0/0.0/16_013/0001758","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G3826902900","display_name":null,"funder_award_id":"2/0017/17","funder_id":"https://openalex.org/F4320323641","funder_display_name":"Vedeck\u00e1 Grantov\u00e1 Agent\u00fara M\u0160VVa\u0160 SR a SAV"},{"id":"https://openalex.org/G4327328590","display_name":null,"funder_award_id":"K-16-022-00","funder_id":"https://openalex.org/F4320323640","funder_display_name":"Kult\u00farna a Edukacn\u00e1 Grantov\u00e1 Agent\u00fara M\u0160VVa\u0160 SR"}],"funders":[{"id":"https://openalex.org/F4320323640","display_name":"Kult\u00farna a Edukacn\u00e1 Grantov\u00e1 Agent\u00fara M\u0160VVa\u0160 SR","ror":"https://ror.org/044gwpv05"},{"id":"https://openalex.org/F4320323641","display_name":"Vedeck\u00e1 Grantov\u00e1 Agent\u00fara M\u0160VVa\u0160 SR a SAV","ror":"https://ror.org/044gwpv05"},{"id":"https://openalex.org/F4320335322","display_name":"European Regional Development Fund","ror":"https://ror.org/00k4n6c32"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W61988534","https://openalex.org/W69408855","https://openalex.org/W76669076","https://openalex.org/W321015332","https://openalex.org/W1521846482","https://openalex.org/W1564586216","https://openalex.org/W1965026809","https://openalex.org/W1973866120","https://openalex.org/W1982697162","https://openalex.org/W2043054548","https://openalex.org/W2066651136","https://openalex.org/W2093585241","https://openalex.org/W2124966964","https://openalex.org/W2127589659","https://openalex.org/W2148259819","https://openalex.org/W2155870214","https://openalex.org/W2245292865","https://openalex.org/W2250752319","https://openalex.org/W2321170399","https://openalex.org/W2406566270","https://openalex.org/W2553216083","https://openalex.org/W2570920354","https://openalex.org/W2576768972","https://openalex.org/W2578181650","https://openalex.org/W2582743722","https://openalex.org/W2626189838","https://openalex.org/W2766079409","https://openalex.org/W2790796938","https://openalex.org/W2795582519","https://openalex.org/W2898216630","https://openalex.org/W3005783286","https://openalex.org/W3006993113","https://openalex.org/W3147425280","https://openalex.org/W3210183452","https://openalex.org/W4211148787","https://openalex.org/W6602789379","https://openalex.org/W6678927303","https://openalex.org/W6690495513","https://openalex.org/W6713670424","https://openalex.org/W6729878350","https://openalex.org/W6731765786","https://openalex.org/W6773989330","https://openalex.org/W7045799469"],"related_works":["https://openalex.org/W305958151","https://openalex.org/W2782410293","https://openalex.org/W2724504120","https://openalex.org/W88721864","https://openalex.org/W4360856886","https://openalex.org/W2614400517","https://openalex.org/W4378675964","https://openalex.org/W578757760","https://openalex.org/W2547738291","https://openalex.org/W2015724609"],"abstract_inverted_index":null,"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
