{"id":"https://openalex.org/W2576037640","doi":"https://doi.org/10.3233/978-1-61499-701-6-122","title":"Between Diachrony and Synchrony: Evaluation of Lexical Quality of a Digitized Historical Finnish Newspaper and Journal Collection with Morphological Analyzers","display_name":"Between Diachrony and Synchrony: Evaluation of Lexical Quality of a Digitized Historical Finnish Newspaper and Journal Collection with Morphological Analyzers","publication_year":2016,"publication_date":"2016-01-01","ids":{"openalex":"https://openalex.org/W2576037640","doi":"https://doi.org/10.3233/978-1-61499-701-6-122","mag":"2576037640"},"language":"en","primary_location":{"id":"doi:10.3233/978-1-61499-701-6-122","is_oa":true,"landing_page_url":"https://doi.org/10.3233/978-1-61499-701-6-122","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.3233/978-1-61499-701-6-122","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014360834","display_name":"Kimmo Kettunen","orcid":"https://orcid.org/0000-0003-2747-1382"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Kettunen Kimmo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116395818","display_name":"P auml auml kk ouml nen Tuula","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"P&auml;&auml;kk&ouml;nen Tuula","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5050255661","display_name":"Mika Koistinen","orcid":"https://orcid.org/0000-0003-0471-314X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Koistinen Mika","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5014360834"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.3547,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.83170105,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12377","display_name":"Digital Humanities and Scholarship","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.9599000215530396,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/newspaper","display_name":"Newspaper","score":0.8725978136062622},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.47554895281791687},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.46441975235939026},{"id":"https://openalex.org/keywords/history","display_name":"History","score":0.4246058464050293},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.41834113001823425},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3774826228618622},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.33197343349456787},{"id":"https://openalex.org/keywords/media-studies","display_name":"Media studies","score":0.11317932605743408},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.07856059074401855},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.047486305236816406}],"concepts":[{"id":"https://openalex.org/C201280247","wikidata":"https://www.wikidata.org/wiki/Q11032","display_name":"Newspaper","level":2,"score":0.8725978136062622},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47554895281791687},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.46441975235939026},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.4246058464050293},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.41834113001823425},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3774826228618622},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.33197343349456787},{"id":"https://openalex.org/C29595303","wikidata":"https://www.wikidata.org/wiki/Q165650","display_name":"Media studies","level":1,"score":0.11317932605743408},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.07856059074401855},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.047486305236816406},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/978-1-61499-701-6-122","is_oa":true,"landing_page_url":"https://doi.org/10.3233/978-1-61499-701-6-122","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.3233/978-1-61499-701-6-122","is_oa":true,"landing_page_url":"https://doi.org/10.3233/978-1-61499-701-6-122","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[{"score":0.6200000047683716,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W1582863317","https://openalex.org/W1990871427","https://openalex.org/W2069172670","https://openalex.org/W2138238723","https://openalex.org/W2249004193","https://openalex.org/W2266997980","https://openalex.org/W2471854148","https://openalex.org/W2497337038","https://openalex.org/W2574966786"],"related_works":["https://openalex.org/W2376554757","https://openalex.org/W612150824","https://openalex.org/W2361959990","https://openalex.org/W2100945520","https://openalex.org/W1596512750","https://openalex.org/W2383443050","https://openalex.org/W2367702734","https://openalex.org/W644339423","https://openalex.org/W2386525189","https://openalex.org/W2360284199"],"abstract_inverted_index":{"The":[0,20,34,86,127],"National":[1,87],"Library":[2],"of":[3,22,49,53,62,78,107,129,152,165,189,193,200,208,213,222,240,250,266],"Finland":[4,14],"has":[5,81,112],"digitized":[6],"the":[7,23,50,79,94,108,163,190,194,223,229,237,241,267],"historical":[8],"newspapers":[9],"and":[10,17,45,59,74,116,145,203,215,244],"journals":[11],"published":[12],"in":[13,43,72,136,228],"between":[15],"1771":[16],"1910":[18,28],"[1,2].":[19],"size":[21],"whole":[24,109],"collection":[25,36,51,80],"up":[26,254],"to":[27,161,174,184,217,235,246],"is":[29,132,156,232],"about":[30,54,82],"3.1":[31],"M":[32],"pages.":[33,65],"newspaper":[35],"contains":[37],"approximately":[38],"1.961":[39],"million":[40],"pages":[41,71],"mostly":[42],"Finnish":[44,47,76,191],"Swedish.":[46],"part":[48,61,77,192],"consists":[52],"1":[55],"063":[56],"648":[57],"pages,":[58],"Swedish":[60],"892":[63],"101":[64],"Additionally":[66],"there":[67],"are":[68,91],"11":[69],"548":[70],"German":[73],"Russian.":[75],"2.407":[83],"billion":[84],"words.":[85],"Library's":[88],"Digital":[89],"Collections":[90],"offered":[92],"via":[93],"digi.kansalliskirjasto.fi":[95],"web":[96],"service,":[97],"also":[98],"known":[99],"as":[100,139,149],"Digi.":[101],"An":[102],"open":[103],"data":[104,221,243],"delivery":[105],"package":[106],"text":[110],"material":[111],"been":[113],"produced":[114],"recently":[115],"it":[117,140],"will":[118],"be":[119,172],"made":[120],"publicly":[121],"available":[122,159],"later":[123],"this":[124],"year":[125],"[3].":[126],"quality":[128,164,188,230,259],"OCRed":[130],"collections":[131,153],"an":[133],"important":[134],"topic":[135],"digital":[137],"humanities,":[138],"affects":[141],"general":[142],"usability,":[143],"searchability":[144],"advanced":[146],"processing,":[147],"such":[148],"content":[150],"mining,":[151],"[4,5].":[154],"There":[155],"no":[157],"single":[158],"method":[160],"assess":[162],"large":[166],"collections,":[167],"but":[168],"different":[169],"methods":[170,183,251],"can":[171],"used":[173],"approximate":[175,185],"quality.":[176],"This":[177],"paper":[178],"uses":[179],"corpus":[180],"analysis":[181,212,231],"style":[182],"overall":[186],"lexical":[187,220,242],"Digi":[195],"collection.":[196],"Methods":[197],"include":[198],"usage":[199,207],"parallel":[201],"samples":[202],"word":[204],"error":[205],"rates,":[206],"morphological":[209],"analyzers,":[210],"frequency":[211],"words":[214],"comparisons":[216],"comparable":[218],"edited":[219],"same":[224],"era.":[225],"Our":[226],"aim":[227],"twofold:":[233],"firstly":[234],"analyze":[236],"present":[238],"state":[239],"secondly,":[245],"establish":[247],"a":[248,255],"set":[249],"that":[252],"build":[253],"compact":[256],"procedure":[257],"for":[258],"assessment":[260],"after":[261],"e.g.":[262],"re-OCRing":[263],"or":[264],"post-correction":[265],"material.":[268]},"counts_by_year":[{"year":2021,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":3}],"updated_date":"2026-03-22T08:09:32.410652","created_date":"2025-10-10T00:00:00"}
