{"id":"https://openalex.org/W2955559629","doi":"https://doi.org/10.1145/3322905.3322914","title":"Improving OCR of historical newspapers and journals published in Finland","display_name":"Improving OCR of historical newspapers and journals published in Finland","publication_year":2019,"publication_date":"2019-05-08","ids":{"openalex":"https://openalex.org/W2955559629","doi":"https://doi.org/10.1145/3322905.3322914","mag":"2955559629"},"language":"en","primary_location":{"id":"doi:10.1145/3322905.3322914","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3322905.3322914","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd International Conference on Digital Access to Textual Cultural Heritage","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://hdl.handle.net/10138/308417","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028952369","display_name":"Senka Drobac","orcid":"https://orcid.org/0000-0002-7645-3079"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Senka Drobac","raw_affiliation_strings":["University of Helsinki"],"affiliations":[{"raw_affiliation_string":"University of Helsinki","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045510348","display_name":"Pekka Kauppinen","orcid":"https://orcid.org/0000-0003-2071-5110"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Pekka Kauppinen","raw_affiliation_strings":["University of Helsinki"],"affiliations":[{"raw_affiliation_string":"University of Helsinki","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001408607","display_name":"Krister Lind\u00e9n","orcid":"https://orcid.org/0000-0003-2337-303X"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Krister Lind\u00e9n","raw_affiliation_strings":["University of Helsinki"],"affiliations":[{"raw_affiliation_string":"University of Helsinki","institution_ids":["https://openalex.org/I133731052"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5028952369"],"corresponding_institution_ids":["https://openalex.org/I133731052"],"apc_list":null,"apc_paid":null,"fwci":0.4089,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.64967129,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"97","last_page":"102"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.98580002784729,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/newspaper","display_name":"Newspaper","score":0.8571324944496155},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7788597941398621},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.7062353491783142},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7026489973068237},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.6205785274505615},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6129374504089355},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5705565810203552},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.5498858690261841},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.5307061076164246},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4639434814453125},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.4610888361930847},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.431777685880661},{"id":"https://openalex.org/keywords/character-recognition","display_name":"Character recognition","score":0.4124827980995178},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.35463714599609375},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09456932544708252},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09053009748458862},{"id":"https://openalex.org/keywords/advertising","display_name":"Advertising","score":0.0802331268787384}],"concepts":[{"id":"https://openalex.org/C201280247","wikidata":"https://www.wikidata.org/wiki/Q11032","display_name":"Newspaper","level":2,"score":0.8571324944496155},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7788597941398621},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.7062353491783142},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7026489973068237},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.6205785274505615},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6129374504089355},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5705565810203552},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.5498858690261841},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.5307061076164246},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4639434814453125},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.4610888361930847},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.431777685880661},{"id":"https://openalex.org/C2987247673","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Character recognition","level":3,"score":0.4124827980995178},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.35463714599609375},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09456932544708252},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09053009748458862},{"id":"https://openalex.org/C112698675","wikidata":"https://www.wikidata.org/wiki/Q37038","display_name":"Advertising","level":1,"score":0.0802331268787384},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3322905.3322914","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3322905.3322914","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd International Conference on Digital Access to Textual Cultural Heritage","raw_type":"proceedings-article"},{"id":"pmh:oai:helda.helsinki.fi:10138/308417","is_oa":true,"landing_page_url":"http://hdl.handle.net/10138/308417","pdf_url":null,"source":{"id":"https://openalex.org/S4210213322","display_name":"Ty\u00f6v\u00e4entutkimus Vuosikirja","issn_l":"0784-1272","issn":["0784-1272","1459-7780"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference contribution"}],"best_oa_location":{"id":"pmh:oai:helda.helsinki.fi:10138/308417","is_oa":true,"landing_page_url":"http://hdl.handle.net/10138/308417","pdf_url":null,"source":{"id":"https://openalex.org/S4210213322","display_name":"Ty\u00f6v\u00e4entutkimus Vuosikirja","issn_l":"0784-1272","issn":["0784-1272","1459-7780"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference contribution"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.8299999833106995,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W1647671624","https://openalex.org/W1993557076","https://openalex.org/W2060580591","https://openalex.org/W2064726190","https://openalex.org/W2088483389","https://openalex.org/W2541388296","https://openalex.org/W2583190795","https://openalex.org/W2805770332","https://openalex.org/W2963414181","https://openalex.org/W2990589356"],"related_works":["https://openalex.org/W4312612713","https://openalex.org/W2042634493","https://openalex.org/W2728450881","https://openalex.org/W2100798736","https://openalex.org/W2481236784","https://openalex.org/W4383748232","https://openalex.org/W2085262576","https://openalex.org/W2786227708","https://openalex.org/W4388089185","https://openalex.org/W1990237101"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"experiments":[3],"on":[4,91,105],"Optical":[5],"character":[6],"recognition":[7],"(OCR)":[8],"of":[9],"historical":[10],"newspapers":[11],"and":[12,24,26,32,50,60],"journals":[13],"published":[14],"in":[15,29,70],"Finland.":[16],"The":[17],"corpus":[18],"has":[19],"two":[20],"main":[21],"languages:":[22],"Finnish":[23,93],"Swedish":[25],"is":[27,43,79],"written":[28],"both":[30,58],"Blackletter":[31],"Antiqua":[33],"fonts.":[34,62],"Here":[35],"we":[36,65,86],"experiment":[37],"with":[38,82,96],"how":[39],"much":[40],"training":[41],"data":[42,107],"enough":[44],"to":[45,52],"train":[46,53],"high":[47],"accuracy":[48],"models,":[49],"try":[51],"a":[54],"joint":[55],"model":[56,74,85],"for":[57,75],"languages":[59],"all":[61],"So":[63],"far":[64],"have":[66],"not":[67],"been":[68],"successful":[69],"getting":[71],"one":[72],"best":[73,89],"all,":[76],"but":[77],"it":[78],"promising":[80],"that":[81],"the":[83,88,92],"mixed":[84],"get":[87],"results":[90,104],"test":[94],"set":[95],"95":[97],"%":[98],"CAR,":[99],"which":[100],"clearly":[101],"surpasses":[102],"previous":[103],"this":[106],"set.":[108]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
