{"id":"https://openalex.org/W4388651588","doi":"https://doi.org/10.3390/data8110170","title":"Introducing DeReKoGram: A Novel Frequency Dataset with Lemma and Part-of-Speech Information for German","display_name":"Introducing DeReKoGram: A Novel Frequency Dataset with Lemma and Part-of-Speech Information for German","publication_year":2023,"publication_date":"2023-11-10","ids":{"openalex":"https://openalex.org/W4388651588","doi":"https://doi.org/10.3390/data8110170"},"language":"en","primary_location":{"id":"doi:10.3390/data8110170","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data8110170","pdf_url":"https://www.mdpi.com/2306-5729/8/11/170/pdf?version=1699619637","source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2306-5729/8/11/170/pdf?version=1699619637","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047741907","display_name":"Sascha Wolfer","orcid":"https://orcid.org/0000-0002-8893-8153"},"institutions":[{"id":"https://openalex.org/I4210099471","display_name":"Leibniz Institute for the German Language","ror":"https://ror.org/00hvwkt50","country_code":"DE","type":"facility","lineage":["https://openalex.org/I315704651","https://openalex.org/I4210099471"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Sascha Wolfer","raw_affiliation_strings":["Leibniz Institute for the German Language (IDS), 68161 Mannheim, Germany"],"raw_orcid":"https://orcid.org/0000-0002-8893-8153","affiliations":[{"raw_affiliation_string":"Leibniz Institute for the German Language (IDS), 68161 Mannheim, Germany","institution_ids":["https://openalex.org/I4210099471"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062785200","display_name":"Alexander Koplenig","orcid":"https://orcid.org/0000-0002-9630-9680"},"institutions":[{"id":"https://openalex.org/I4210099471","display_name":"Leibniz Institute for the German Language","ror":"https://ror.org/00hvwkt50","country_code":"DE","type":"facility","lineage":["https://openalex.org/I315704651","https://openalex.org/I4210099471"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Alexander Koplenig","raw_affiliation_strings":["Leibniz Institute for the German Language (IDS), 68161 Mannheim, Germany"],"raw_orcid":"https://orcid.org/0000-0002-9630-9680","affiliations":[{"raw_affiliation_string":"Leibniz Institute for the German Language (IDS), 68161 Mannheim, Germany","institution_ids":["https://openalex.org/I4210099471"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065768650","display_name":"Marc Kupietz","orcid":"https://orcid.org/0000-0001-8997-8256"},"institutions":[{"id":"https://openalex.org/I4210099471","display_name":"Leibniz Institute for the German Language","ror":"https://ror.org/00hvwkt50","country_code":"DE","type":"facility","lineage":["https://openalex.org/I315704651","https://openalex.org/I4210099471"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Marc Kupietz","raw_affiliation_strings":["Leibniz Institute for the German Language (IDS), 68161 Mannheim, Germany"],"raw_orcid":"https://orcid.org/0000-0001-8997-8256","affiliations":[{"raw_affiliation_string":"Leibniz Institute for the German Language (IDS), 68161 Mannheim, Germany","institution_ids":["https://openalex.org/I4210099471"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038182210","display_name":"Carolin M\u00fcller-Spitzer","orcid":"https://orcid.org/0000-0002-5690-7774"},"institutions":[{"id":"https://openalex.org/I4210099471","display_name":"Leibniz Institute for the German Language","ror":"https://ror.org/00hvwkt50","country_code":"DE","type":"facility","lineage":["https://openalex.org/I315704651","https://openalex.org/I4210099471"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Carolin M\u00fcller-Spitzer","raw_affiliation_strings":["Leibniz Institute for the German Language (IDS), 68161 Mannheim, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Leibniz Institute for the German Language (IDS), 68161 Mannheim, Germany","institution_ids":["https://openalex.org/I4210099471"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5047741907"],"corresponding_institution_ids":["https://openalex.org/I4210099471"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1658,"currency":"EUR","value_usd":1788},"fwci":0.3408,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.66923976,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"8","issue":"11","first_page":"170","last_page":"170"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9794999957084656,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lemma","display_name":"Lemma (botany)","score":0.8315472602844238},{"id":"https://openalex.org/keywords/german","display_name":"German","score":0.7584257125854492},{"id":"https://openalex.org/keywords/python","display_name":"Python (programming language)","score":0.7134978771209717},{"id":"https://openalex.org/keywords/scripting-language","display_name":"Scripting language","score":0.6630945801734924},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.630628228187561},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5863357186317444},{"id":"https://openalex.org/keywords/lemmatisation","display_name":"Lemmatisation","score":0.5367423892021179},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.432218074798584},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42408889532089233},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.139797180891037},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.06989225745201111}],"concepts":[{"id":"https://openalex.org/C2777759810","wikidata":"https://www.wikidata.org/wiki/Q149316","display_name":"Lemma (botany)","level":3,"score":0.8315472602844238},{"id":"https://openalex.org/C154775046","wikidata":"https://www.wikidata.org/wiki/Q188","display_name":"German","level":2,"score":0.7584257125854492},{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.7134978771209717},{"id":"https://openalex.org/C61423126","wikidata":"https://www.wikidata.org/wiki/Q187432","display_name":"Scripting language","level":2,"score":0.6630945801734924},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.630628228187561},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5863357186317444},{"id":"https://openalex.org/C161831844","wikidata":"https://www.wikidata.org/wiki/Q2554325","display_name":"Lemmatisation","level":2,"score":0.5367423892021179},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.432218074798584},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42408889532089233},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.139797180891037},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.06989225745201111},{"id":"https://openalex.org/C46757340","wikidata":"https://www.wikidata.org/wiki/Q43238","display_name":"Poaceae","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/data8110170","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data8110170","pdf_url":"https://www.mdpi.com/2306-5729/8/11/170/pdf?version=1699619637","source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},{"id":"pmh:oai:ids-pub.bsz-bw.de:12293","is_oa":true,"landing_page_url":"https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/12293","pdf_url":"https://ids-pub.bsz-bw.de/files/12293/Wolfer_Koplenig_Kupietz_Introducing_DeReKoGram_2023.pdf","source":{"id":"https://openalex.org/S4306401750","display_name":"Publication Server of the Institute for German Language (Institute for German Language)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210099471","host_organization_name":"Leibniz Institute for the German Language","host_organization_lineage":["https://openalex.org/I4210099471"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"},{"id":"pmh:oai:doaj.org/article:dc7024147ee8482ca8ab9949e102628e","is_oa":true,"landing_page_url":"https://doaj.org/article/dc7024147ee8482ca8ab9949e102628e","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Data, Vol 8, Iss 11, p 170 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/data8110170","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data8110170","pdf_url":"https://www.mdpi.com/2306-5729/8/11/170/pdf?version=1699619637","source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4388651588.pdf"},"referenced_works_count":16,"referenced_works":["https://openalex.org/W1561004407","https://openalex.org/W1808906688","https://openalex.org/W1979193217","https://openalex.org/W2019096529","https://openalex.org/W2067359214","https://openalex.org/W2082283091","https://openalex.org/W2088572966","https://openalex.org/W2408040052","https://openalex.org/W2462403954","https://openalex.org/W2506660467","https://openalex.org/W2805739644","https://openalex.org/W3210728678","https://openalex.org/W4231945469","https://openalex.org/W4281764986","https://openalex.org/W6633621408","https://openalex.org/W6676373471"],"related_works":["https://openalex.org/W3023169329","https://openalex.org/W4389470870","https://openalex.org/W2782165897","https://openalex.org/W4300438041","https://openalex.org/W2054104202","https://openalex.org/W2139703748","https://openalex.org/W2761254753","https://openalex.org/W1024825291","https://openalex.org/W2188981919","https://openalex.org/W3084909426"],"abstract_inverted_index":{"We":[0,46,115,126],"introduce":[1],"DeReKoGram,":[2],"a":[3,29,72,87],"novel":[4],"frequency":[5],"dataset":[6,24,50],"containing":[7],"lemma":[8],"and":[9,16,35,53,137],"part-of-speech":[10],"(POS)":[11],"information":[12,26],"for":[13],"1-,":[14],"2-,":[15],"3-grams":[17],"from":[18],"the":[19,49,57,60,75,92,99,113,119,124,132,146],"German":[20],"Reference":[21],"Corpus.":[22],"The":[23],"contains":[25],"based":[27,41],"on":[28,42,141],"corpus":[30,44],"of":[31,74,94,101,108,123,134],"43.2":[32],"billion":[33],"tokens":[34],"is":[36,67],"divided":[37],"into":[38],"16":[39,43,61],"parts":[40],"folds.":[45],"describe":[47],"how":[48,142],"was":[51],"created":[52],"structured.":[54],"By":[55],"evaluating":[56],"distribution":[58],"over":[59],"folds,":[62],"we":[63,90],"show":[64],"that":[65],"it":[66],"possible":[68],"to":[69,82,143],"work":[70,144],"with":[71,118,145],"subset":[73],"folds":[76,109],"in":[77,112,131],"many":[78],"use":[79],"cases":[80],"(e.g.,":[81],"save":[83],"computational":[84],"resources).":[85],"In":[86],"case":[88],"study,":[89],"investigate":[91],"growth":[93],"vocabulary":[95],"(as":[96],"well":[97],"as":[98,104],"number":[100,107],"hapax":[102],"legomena)":[103],"an":[105],"increasing":[106],"are":[110],"included":[111],"analysis.":[114],"cross-combine":[116],"this":[117],"various":[120],"cleaning":[121],"stages":[122],"dataset.":[125],"also":[127],"give":[128],"some":[129],"guidance":[130],"form":[133],"Python,":[135],"R,":[136],"Stata":[138],"markdown":[139],"scripts":[140],"resource.":[147]},"counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2025-12-23T23:11:35.936235","created_date":"2025-10-10T00:00:00"}
