{"id":"https://openalex.org/W1570518225","doi":"https://doi.org/10.1007/978-3-642-30910-6_13","title":"The Construction of a 500-Million-Word Reference Corpus of Contemporary Written Dutch","display_name":"The Construction of a 500-Million-Word Reference Corpus of Contemporary Written Dutch","publication_year":2012,"publication_date":"2012-11-11","ids":{"openalex":"https://openalex.org/W1570518225","doi":"https://doi.org/10.1007/978-3-642-30910-6_13","mag":"1570518225"},"language":"en","primary_location":{"id":"doi:10.1007/978-3-642-30910-6_13","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-642-30910-6_13","pdf_url":"https://link.springer.com/content/pdf/10.1007/978-3-642-30910-6_13.pdf","source":{"id":"https://openalex.org/S4210206418","display_name":"Theory and applications of natural language processing","issn_l":"2192-032X","issn":["2192-032X","2192-0338"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Theory and Applications of Natural Language Processing","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/978-3-642-30910-6_13.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113868482","display_name":"Nelleke Oostdijk","orcid":null},"institutions":[{"id":"https://openalex.org/I145872427","display_name":"Radboud University Nijmegen","ror":"https://ror.org/016xsfp80","country_code":"NL","type":"education","lineage":["https://openalex.org/I145872427"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Nelleke Oostdijk","raw_affiliation_strings":["Radboud University Nijmegen, Nijmegen, The Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Radboud University Nijmegen, Nijmegen, The Netherlands","institution_ids":["https://openalex.org/I145872427"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006437012","display_name":"Martin Reynaert","orcid":null},"institutions":[{"id":"https://openalex.org/I193700539","display_name":"Tilburg University","ror":"https://ror.org/04b8v1s79","country_code":"NL","type":"education","lineage":["https://openalex.org/I193700539"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Martin Reynaert","raw_affiliation_strings":["Tilburg University, Tilburg, The Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tilburg University, Tilburg, The Netherlands","institution_ids":["https://openalex.org/I193700539"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019867041","display_name":"V\u00e9ronique Hoste","orcid":"https://orcid.org/0000-0002-0539-4630"},"institutions":[{"id":"https://openalex.org/I12607205","display_name":"University College Ghent","ror":"https://ror.org/00rs45z86","country_code":"BE","type":"education","lineage":["https://openalex.org/I12607205"]},{"id":"https://openalex.org/I32597200","display_name":"Ghent University","ror":"https://ror.org/00cv9y106","country_code":"BE","type":"education","lineage":["https://openalex.org/I32597200"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"V\u00e9ronique Hoste","raw_affiliation_strings":["University College Ghent and Ghent University, Ghent, Belgium"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University College Ghent and Ghent University, Ghent, Belgium","institution_ids":["https://openalex.org/I12607205","https://openalex.org/I32597200"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068809019","display_name":"Ineke Schuurman","orcid":null},"institutions":[{"id":"https://openalex.org/I99464096","display_name":"KU Leuven","ror":"https://ror.org/05f950310","country_code":"BE","type":"education","lineage":["https://openalex.org/I99464096"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Ineke Schuurman","raw_affiliation_strings":["KU Leuven, Leuven, Belgium"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"KU Leuven, Leuven, Belgium","institution_ids":["https://openalex.org/I99464096"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5113868482"],"corresponding_institution_ids":["https://openalex.org/I145872427"],"apc_list":null,"apc_paid":null,"fwci":6.6627,"has_fulltext":true,"cited_by_count":171,"citation_normalized_percentile":{"value":0.97255586,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"219","last_page":"247"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12353","display_name":"Lexicography and Language Studies","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5472464561462402},{"id":"https://openalex.org/keywords/corpus-linguistics","display_name":"Corpus linguistics","score":0.5176066160202026},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.48818832635879517},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4662296772003174},{"id":"https://openalex.org/keywords/sonar","display_name":"Sonar","score":0.46017470955848694},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4099770188331604},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.382699579000473},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3251025676727295},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.06194859743118286}],"concepts":[{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5472464561462402},{"id":"https://openalex.org/C532629269","wikidata":"https://www.wikidata.org/wiki/Q865083","display_name":"Corpus linguistics","level":2,"score":0.5176066160202026},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.48818832635879517},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4662296772003174},{"id":"https://openalex.org/C555745239","wikidata":"https://www.wikidata.org/wiki/Q133220","display_name":"Sonar","level":2,"score":0.46017470955848694},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4099770188331604},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.382699579000473},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3251025676727295},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.06194859743118286},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1007/978-3-642-30910-6_13","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-642-30910-6_13","pdf_url":"https://link.springer.com/content/pdf/10.1007/978-3-642-30910-6_13.pdf","source":{"id":"https://openalex.org/S4210206418","display_name":"Theory and applications of natural language processing","issn_l":"2192-032X","issn":["2192-032X","2192-0338"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Theory and Applications of Natural Language Processing","raw_type":"book-chapter"},{"id":"pmh:oai:tilburguniversity.edu:openaire_cris_publications/a0dbaf27-79f5-4a28-a499-e0b1c5e06cf0","is_oa":false,"landing_page_url":"https://research.tilburguniversity.edu/en/publications/a0dbaf27-79f5-4a28-a499-e0b1c5e06cf0","pdf_url":null,"source":{"id":"https://openalex.org/S4406923027","display_name":"Tilburg University Research Portal","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Oostdijk, N, Reynaert, M W C, Hoste, V & Schuurman, I 2012, The construction of a 500-million-word reference corpus of contemporary written Dutch. in P Spyns & J Odijk (eds), Essential Speech and language Technology for Dutch : Results by the STEVIN-programme. Springer Verlag, Heidelberg, pp. 201-226.","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:lirias2repo.kuleuven.be:123456789/400061","is_oa":true,"landing_page_url":"https://lirias.kuleuven.be/handle/123456789/400061","pdf_url":null,"source":{"id":"https://openalex.org/S4306401954","display_name":"Lirias (KU Leuven)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I99464096","host_organization_name":"KU Leuven","host_organization_lineage":["https://openalex.org/I99464096"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"Essential Speech and Language Technology for Dutch: resources, tools and applications, Chapt. 13, (219-247), (Theory and Applications of Natural Language Processing)","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:repository.ubn.ru.nl:2066/116265","is_oa":false,"landing_page_url":"http://hdl.handle.net/2066/116265","pdf_url":null,"source":{"id":"https://openalex.org/S4306401067","display_name":"Radboud Repository (Radboud University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I145872427","host_organization_name":"Radboud University Nijmegen","host_organization_lineage":["https://openalex.org/I145872427"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Spyns, P.; Odijk, J. (ed.), Essential Speech and Language Technology for Dutch, pp. 219-247","raw_type":"Part of book or chapter of book"},{"id":"pmh:oai:tilburguniversity.edu:publications/a0dbaf27-79f5-4a28-a499-e0b1c5e06cf0","is_oa":false,"landing_page_url":"https://pure.uvt.nl/portal/en/publications/the-construction-of-a-500millionword-reference-corpus-of-contemporary-written-dutch(a0dbaf27-79f5-4a28-a499-e0b1c5e06cf0).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306401490","display_name":"Research portal (Tilburg University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I193700539","host_organization_name":"Tilburg University","host_organization_lineage":["https://openalex.org/I193700539"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Oostdijk, N, Reynaert, M W C, Hoste, V & Schuurman, I 2012, The construction of a 500-million-word reference corpus of contemporary written Dutch. in P Spyns & J Odijk (eds), Essential Speech and language Technology for Dutch : Results by the STEVIN-programme. Springer Verlag, Heidelberg, pp. 201-226.","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.1007/978-3-642-30910-6_13","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-642-30910-6_13","pdf_url":"https://link.springer.com/content/pdf/10.1007/978-3-642-30910-6_13.pdf","source":{"id":"https://openalex.org/S4210206418","display_name":"Theory and applications of natural language processing","issn_l":"2192-032X","issn":["2192-032X","2192-0338"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Theory and Applications of Natural Language Processing","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W1570518225.pdf","grobid_xml":"https://content.openalex.org/works/W1570518225.grobid-xml"},"referenced_works_count":54,"referenced_works":["https://openalex.org/W38436086","https://openalex.org/W52875691","https://openalex.org/W84750871","https://openalex.org/W148834818","https://openalex.org/W155979645","https://openalex.org/W168452485","https://openalex.org/W187699659","https://openalex.org/W200443455","https://openalex.org/W206368066","https://openalex.org/W301308173","https://openalex.org/W1511645536","https://openalex.org/W1514244798","https://openalex.org/W1524609903","https://openalex.org/W1552061706","https://openalex.org/W1552336971","https://openalex.org/W1560781570","https://openalex.org/W1577423537","https://openalex.org/W1586196985","https://openalex.org/W1625582487","https://openalex.org/W1699166917","https://openalex.org/W1920276121","https://openalex.org/W1954663096","https://openalex.org/W1965693266","https://openalex.org/W1972707925","https://openalex.org/W2035482918","https://openalex.org/W2049082952","https://openalex.org/W2053379943","https://openalex.org/W2056451646","https://openalex.org/W2113342254","https://openalex.org/W2113786274","https://openalex.org/W2115323600","https://openalex.org/W2116957256","https://openalex.org/W2119384275","https://openalex.org/W2127589659","https://openalex.org/W2144578941","https://openalex.org/W2153330999","https://openalex.org/W2153804780","https://openalex.org/W2155652813","https://openalex.org/W2158847908","https://openalex.org/W2168664213","https://openalex.org/W2171950509","https://openalex.org/W2251177966","https://openalex.org/W2407338347","https://openalex.org/W2550419310","https://openalex.org/W2579915115","https://openalex.org/W2598282550","https://openalex.org/W2607975287","https://openalex.org/W2729906263","https://openalex.org/W2915429162","https://openalex.org/W3038086829","https://openalex.org/W4233290938","https://openalex.org/W4243971525","https://openalex.org/W6634220394","https://openalex.org/W6826268582"],"related_works":["https://openalex.org/W3199220297","https://openalex.org/W2137769200","https://openalex.org/W2032962400","https://openalex.org/W2952608638","https://openalex.org/W2911323905","https://openalex.org/W2130145841","https://openalex.org/W1890543963","https://openalex.org/W2350761455","https://openalex.org/W1996172652","https://openalex.org/W2347430706"],"abstract_inverted_index":{"The":[0,41,61],"construction":[1,62],"of":[2,9,16,19,63],"a":[3,24,92],"large":[4],"and":[5,30,38,53,72,82,89,100],"richly":[6],"annotated":[7],"corpus":[8,58,65],"written":[10],"Dutch":[11,56],"was":[12,59,94],"identified":[13],"as":[14],"one":[15],"the":[17,20,55,64,76,80,83,87],"priorities":[18],"STEVIN":[21],"programme.":[22],"Such":[23],"corpus,":[25],"sampling":[26],"texts":[27],"from":[28],"conventional":[29],"new":[31],"media,":[32],"is":[33],"invaluable":[34],"for":[35],"scientific":[36],"research":[37],"application":[39],"development.":[40],"present":[42],"chapter":[43],"describes":[44],"how":[45],"in":[46,86],"two":[47],"consecutive":[48],"STEVIN-funded":[49],"projects,":[50,91],"viz.":[51],"D-Coi":[52,88],"SoNaR,":[54],"reference":[57],"developed.":[60],"has":[66],"been":[67],"guided":[68],"by":[69],"(inter)national":[70],"standards":[71],"best":[73],"practices.":[74],"At":[75],"same":[77],"time":[78],"through":[79],"achievements":[81],"experiences":[84],"gained":[85],"SoNaR":[90],"contribution":[93],"made":[95],"to":[96],"their":[97],"further":[98],"advancement":[99],"dissemination.":[101]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":36},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":14},{"year":2022,"cited_by_count":14},{"year":2021,"cited_by_count":13},{"year":2020,"cited_by_count":18},{"year":2019,"cited_by_count":11},{"year":2018,"cited_by_count":9},{"year":2017,"cited_by_count":15},{"year":2016,"cited_by_count":10},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":12},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":2}],"updated_date":"2026-06-09T15:46:55.921056","created_date":"2025-10-10T00:00:00"}
