{"id":"https://openalex.org/W4221145058","doi":"https://doi.org/10.1162/tacl_a_00470","title":"Czech Grammar Error Correction with a Large and Diverse Corpus","display_name":"Czech Grammar Error Correction with a Large and Diverse Corpus","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4221145058","doi":"https://doi.org/10.1162/tacl_a_00470"},"language":"en","primary_location":{"id":"doi:10.1162/tacl_a_00470","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00470","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00470/2008050/tacl_a_00470.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00470/2008050/tacl_a_00470.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jakub N\u00e1plava","orcid":null},"institutions":[{"id":"https://openalex.org/I21250087","display_name":"Charles University","ror":"https://ror.org/024d6js02","country_code":"CZ","type":"education","lineage":["https://openalex.org/I21250087"]}],"countries":["CZ"],"is_corresponding":true,"raw_author_name":"Jakub N\u00e1plava","raw_affiliation_strings":["Charles University, Faculty of Mathematics and Physics Institute of Formal and Applied Linguistics, Czech Republic. naplava@ufal.mff.cuni.cz"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Charles University, Faculty of Mathematics and Physics Institute of Formal and Applied Linguistics, Czech Republic. naplava@ufal.mff.cuni.cz","institution_ids":["https://openalex.org/I21250087"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Milan Straka","orcid":null},"institutions":[{"id":"https://openalex.org/I21250087","display_name":"Charles University","ror":"https://ror.org/024d6js02","country_code":"CZ","type":"education","lineage":["https://openalex.org/I21250087"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Milan Straka","raw_affiliation_strings":["Charles University, Faculty of Mathematics and Physics Institute of Formal and Applied Linguistics, Czech Republic. straka@ufal.mff.cuni.cz"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Charles University, Faculty of Mathematics and Physics Institute of Formal and Applied Linguistics, Czech Republic. straka@ufal.mff.cuni.cz","institution_ids":["https://openalex.org/I21250087"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jana Strakov\u00e1","orcid":null},"institutions":[{"id":"https://openalex.org/I21250087","display_name":"Charles University","ror":"https://ror.org/024d6js02","country_code":"CZ","type":"education","lineage":["https://openalex.org/I21250087"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Jana Strakov\u00e1","raw_affiliation_strings":["Charles University, Faculty of Mathematics and Physics Institute of Formal and Applied Linguistics, Czech Republic. strakova@ufal.mff.cuni.cz"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Charles University, Faculty of Mathematics and Physics Institute of Formal and Applied Linguistics, Czech Republic. strakova@ufal.mff.cuni.cz","institution_ids":["https://openalex.org/I21250087"]}]},{"author_position":"last","author":{"id":null,"display_name":"Alexandr Rosen","orcid":null},"institutions":[{"id":"https://openalex.org/I21250087","display_name":"Charles University","ror":"https://ror.org/024d6js02","country_code":"CZ","type":"education","lineage":["https://openalex.org/I21250087"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Alexandr Rosen","raw_affiliation_strings":["Charles University, Faculty of Arts Institute of Theoretical and Computational Linguistics, Czech Republic. alexandr.rosen@ff.cuni.cz"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Charles University, Faculty of Arts Institute of Theoretical and Computational Linguistics, Czech Republic. alexandr.rosen@ff.cuni.cz","institution_ids":["https://openalex.org/I21250087"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I21250087"],"apc_list":null,"apc_paid":null,"fwci":1.5276,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.85044268,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"10","issue":null,"first_page":"452","last_page":"467"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.8277999758720398,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.8277999758720398,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.11540000140666962,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.011699999682605267,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/czech","display_name":"Czech","score":0.9650999903678894},{"id":"https://openalex.org/keywords/grammar","display_name":"Grammar","score":0.6355999708175659},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5719000101089478},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5670999884605408},{"id":"https://openalex.org/keywords/error-detection-and-correction","display_name":"Error detection and correction","score":0.5091000199317932},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4237000048160553}],"concepts":[{"id":"https://openalex.org/C2777842544","wikidata":"https://www.wikidata.org/wiki/Q9056","display_name":"Czech","level":2,"score":0.9650999903678894},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8338000178337097},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6694999933242798},{"id":"https://openalex.org/C26022165","wikidata":"https://www.wikidata.org/wiki/Q8091","display_name":"Grammar","level":2,"score":0.6355999708175659},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6154000163078308},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5719000101089478},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5670999884605408},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.5091000199317932},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4237000048160553},{"id":"https://openalex.org/C2780560020","wikidata":"https://www.wikidata.org/wiki/Q79719","display_name":"License","level":2,"score":0.4056999981403351},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3273000121116638},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.31529998779296875},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3093000054359436},{"id":"https://openalex.org/C3018824978","wikidata":"https://www.wikidata.org/wiki/Q2894891","display_name":"Error analysis","level":2,"score":0.27619999647140503},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2590999901294708},{"id":"https://openalex.org/C2474386","wikidata":"https://www.wikidata.org/wiki/Q461183","display_name":"Text corpus","level":2,"score":0.25780001282691956},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2533999979496002}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1162/tacl_a_00470","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00470","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00470/2008050/tacl_a_00470.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2201.05590","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2201.05590","pdf_url":"https://arxiv.org/pdf/2201.05590","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:doaj.org/article:0d0a97cf5ac04f2cb89b0412ad40d708","is_oa":true,"landing_page_url":"https://doaj.org/article/0d0a97cf5ac04f2cb89b0412ad40d708","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Transactions of the Association for Computational Linguistics, Vol 10, Pp 452-467 (2022)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1162/tacl_a_00470","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00470","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00470/2008050/tacl_a_00470.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4221145058.pdf","grobid_xml":"https://content.openalex.org/works/W4221145058.grobid-xml"},"referenced_works_count":52,"referenced_works":["https://openalex.org/W1974790883","https://openalex.org/W2007321142","https://openalex.org/W2034669660","https://openalex.org/W2098297786","https://openalex.org/W2142384583","https://openalex.org/W2250591774","https://openalex.org/W2251927615","https://openalex.org/W2294498899","https://openalex.org/W2315316408","https://openalex.org/W2481467102","https://openalex.org/W2512924740","https://openalex.org/W2517375502","https://openalex.org/W2589277916","https://openalex.org/W2741494657","https://openalex.org/W2797885244","https://openalex.org/W2897820187","https://openalex.org/W2913917571","https://openalex.org/W2928406799","https://openalex.org/W2936597270","https://openalex.org/W2950737607","https://openalex.org/W2970076840","https://openalex.org/W2970868759","https://openalex.org/W2971319154","https://openalex.org/W2972799129","https://openalex.org/W2986388218","https://openalex.org/W2988390689","https://openalex.org/W3037162118","https://openalex.org/W3096648221","https://openalex.org/W3100590161","https://openalex.org/W3113952093","https://openalex.org/W3206475813","https://openalex.org/W6637548391","https://openalex.org/W6640808628","https://openalex.org/W6674756558","https://openalex.org/W6678346093","https://openalex.org/W6678481887","https://openalex.org/W6682421076","https://openalex.org/W6685309151","https://openalex.org/W6691177557","https://openalex.org/W6691239541","https://openalex.org/W6691271546","https://openalex.org/W6732188577","https://openalex.org/W6732482070","https://openalex.org/W6737833187","https://openalex.org/W6739901393","https://openalex.org/W6775935039","https://openalex.org/W6778618349","https://openalex.org/W6780455129","https://openalex.org/W6786299523","https://openalex.org/W6793501729","https://openalex.org/W6794413946","https://openalex.org/W6796449527"],"related_works":[],"abstract_inverted_index":{"Abstract":[0],"We":[1,73,102],"introduce":[2],"a":[3,43,84],"large":[4],"and":[5],"diverse":[6],"Czech":[7,40,76,106],"corpus":[8,108],"annotated":[9],"for":[10,29,39],"grammatical":[11],"error":[12,49,54],"correction":[13],"(GEC)":[14],"with":[15],"the":[16,21,104,112],"aim":[17],"to":[18,20,61,68,87],"contribute":[19],"still":[22],"scarce":[23],"data":[24],"resources":[25],"in":[26],"this":[27],"domain":[28],"languages":[30],"other":[31],"than":[32],"English.":[33],"The":[34],"Grammar":[35],"Error":[36],"Correction":[37],"Corpus":[38],"(GECCC)":[41],"offers":[42],"variety":[44],"of":[45],"four":[46],"domains,":[47],"covering":[48],"distributions":[50],"ranging":[51],"from":[52],"high":[53],"density":[55],"essays":[56],"written":[57],"by":[58],"non-native":[59],"speakers,":[60],"website":[62],"texts,":[63],"where":[64],"errors":[65],"are":[66],"expected":[67],"be":[69],"much":[70],"less":[71],"common.":[72],"compare":[74],"several":[75,80],"GEC":[77,94,107],"systems,":[78],"including":[79],"Transformer-based":[81],"ones,":[82],"setting":[83],"strong":[85],"baseline":[86],"future":[88],"research.":[89],"Finally,":[90],"we":[91],"meta-evaluate":[92],"common":[93],"metrics":[95],"against":[96],"human":[97],"judgments":[98],"on":[99],"our":[100],"data.":[101],"make":[103],"new":[105],"publicly":[109],"available":[110],"under":[111],"CC":[113],"BY-SA":[114],"4.0":[115],"license":[116],"at":[117],"http://hdl.handle.net/11234/1-4639.":[118]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":1}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2022-04-03T00:00:00"}
