{"id":"https://openalex.org/W2981788981","doi":"https://doi.org/10.1145/3322905.3322911","title":"Detecting Articles in a Digitized Finnish Historical Newspaper Collection 1771-1929","display_name":"Detecting Articles in a Digitized Finnish Historical Newspaper Collection 1771-1929","publication_year":2019,"publication_date":"2019-05-08","ids":{"openalex":"https://openalex.org/W2981788981","doi":"https://doi.org/10.1145/3322905.3322911","mag":"2981788981"},"language":"en","primary_location":{"id":"doi:10.1145/3322905.3322911","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3322905.3322911","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd International Conference on Digital Access to Textual Cultural Heritage","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hal.science/hal-02111142","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014360834","display_name":"Kimmo Kettunen","orcid":"https://orcid.org/0000-0003-2747-1382"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Kimmo Kettunen","raw_affiliation_strings":["The National Library of Finland DH Research, University of Helsinki, Mikkeli, Finland"],"affiliations":[{"raw_affiliation_string":"The National Library of Finland DH Research, University of Helsinki, Mikkeli, Finland","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063485128","display_name":"Teemu Ruokolainen","orcid":"https://orcid.org/0000-0001-7454-5300"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Teemu Ruokolainen","raw_affiliation_strings":["The National Library of Finland DH Research, University of Helsinki, Mikkeli, Finland"],"affiliations":[{"raw_affiliation_string":"The National Library of Finland DH Research, University of Helsinki, Mikkeli, Finland","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049323346","display_name":"Erno Liukkonen","orcid":null},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Erno Liukkonen","raw_affiliation_strings":["The National Library of Finland DH Research, University of Helsinki, Mikkeli, Finland"],"affiliations":[{"raw_affiliation_string":"The National Library of Finland DH Research, University of Helsinki, Mikkeli, Finland","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112340423","display_name":"Pierrick Tranouez","orcid":"https://orcid.org/0000-0002-1962-0782"},"institutions":[{"id":"https://openalex.org/I62396329","display_name":"Universit\u00e9 de Rouen Normandie","ror":"https://ror.org/03nhjew95","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210105918","https://openalex.org/I62396329"]},{"id":"https://openalex.org/I4210108118","display_name":"Laboratoire d'Informatique, du Traitement de l'Information et des Syst\u00e8mes","ror":"https://ror.org/01f1vfy95","country_code":"FR","type":"facility","lineage":["https://openalex.org/I141576021","https://openalex.org/I4210105918","https://openalex.org/I4210105918","https://openalex.org/I4210105918","https://openalex.org/I4210108118","https://openalex.org/I62396329","https://openalex.org/I88814501"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Pierrick Tranouez","raw_affiliation_strings":["LITIS laboratory, University of Rouen Normandy, France","Equipe Apprentissage"],"affiliations":[{"raw_affiliation_string":"LITIS laboratory, University of Rouen Normandy, France","institution_ids":["https://openalex.org/I4210108118","https://openalex.org/I62396329"]},{"raw_affiliation_string":"Equipe Apprentissage","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018778676","display_name":"Daniel Antelme","orcid":null},"institutions":[{"id":"https://openalex.org/I62396329","display_name":"Universit\u00e9 de Rouen Normandie","ror":"https://ror.org/03nhjew95","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210105918","https://openalex.org/I62396329"]},{"id":"https://openalex.org/I4210108118","display_name":"Laboratoire d'Informatique, du Traitement de l'Information et des Syst\u00e8mes","ror":"https://ror.org/01f1vfy95","country_code":"FR","type":"facility","lineage":["https://openalex.org/I141576021","https://openalex.org/I4210105918","https://openalex.org/I4210105918","https://openalex.org/I4210105918","https://openalex.org/I4210108118","https://openalex.org/I62396329","https://openalex.org/I88814501"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Daniel Antelme","raw_affiliation_strings":["LITIS laboratory, University of Rouen Normandy, France","Equipe Apprentissage"],"affiliations":[{"raw_affiliation_string":"LITIS laboratory, University of Rouen Normandy, France","institution_ids":["https://openalex.org/I4210108118","https://openalex.org/I62396329"]},{"raw_affiliation_string":"Equipe Apprentissage","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022490769","display_name":"Thierry Paquet","orcid":"https://orcid.org/0000-0002-2044-7542"},"institutions":[{"id":"https://openalex.org/I62396329","display_name":"Universit\u00e9 de Rouen Normandie","ror":"https://ror.org/03nhjew95","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210105918","https://openalex.org/I62396329"]},{"id":"https://openalex.org/I4210108118","display_name":"Laboratoire d'Informatique, du Traitement de l'Information et des Syst\u00e8mes","ror":"https://ror.org/01f1vfy95","country_code":"FR","type":"facility","lineage":["https://openalex.org/I141576021","https://openalex.org/I4210105918","https://openalex.org/I4210105918","https://openalex.org/I4210105918","https://openalex.org/I4210108118","https://openalex.org/I62396329","https://openalex.org/I88814501"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Thierry Paquet","raw_affiliation_strings":["LITIS laboratory, University of Rouen Normandy, France","Equipe Apprentissage"],"affiliations":[{"raw_affiliation_string":"LITIS laboratory, University of Rouen Normandy, France","institution_ids":["https://openalex.org/I4210108118","https://openalex.org/I62396329"]},{"raw_affiliation_string":"Equipe Apprentissage","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5014360834"],"corresponding_institution_ids":["https://openalex.org/I133731052"],"apc_list":null,"apc_paid":null,"fwci":0.2043,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.55269007,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"59","last_page":"64"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/newspaper","display_name":"Newspaper","score":0.9616798162460327},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6087396740913391},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5764697790145874},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.41051971912384033},{"id":"https://openalex.org/keywords/data-extraction","display_name":"Data extraction","score":0.41005730628967285},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3720167279243469},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.33474212884902954},{"id":"https://openalex.org/keywords/political-science","display_name":"Political science","score":0.10905331373214722},{"id":"https://openalex.org/keywords/media-studies","display_name":"Media studies","score":0.10286417603492737},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.07774469256401062}],"concepts":[{"id":"https://openalex.org/C201280247","wikidata":"https://www.wikidata.org/wiki/Q11032","display_name":"Newspaper","level":2,"score":0.9616798162460327},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6087396740913391},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5764697790145874},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.41051971912384033},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.41005730628967285},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3720167279243469},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.33474212884902954},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.10905331373214722},{"id":"https://openalex.org/C29595303","wikidata":"https://www.wikidata.org/wiki/Q165650","display_name":"Media studies","level":1,"score":0.10286417603492737},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.07774469256401062},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3322905.3322911","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3322905.3322911","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd International Conference on Digital Access to Textual Cultural Heritage","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-02111142v1","is_oa":true,"landing_page_url":"https://hal.science/hal-02111142","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"DATech 2019, May 2019, Bruxelles, Belgium. &#x27E8;10.1145/3322905.3322911&#x27E9;","raw_type":"Conference papers"},{"id":"pmh:oai:helda.helsinki.fi:10138/312739","is_oa":true,"landing_page_url":"http://hdl.handle.net/10138/312739","pdf_url":null,"source":{"id":"https://openalex.org/S4210213322","display_name":"Ty\u00f6v\u00e4entutkimus Vuosikirja","issn_l":"0784-1272","issn":["0784-1272","1459-7780"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference contribution"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-02111142v1","is_oa":true,"landing_page_url":"https://hal.science/hal-02111142","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"DATech 2019, May 2019, Bruxelles, Belgium. &#x27E8;10.1145/3322905.3322911&#x27E9;","raw_type":"Conference papers"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4699999988079071,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G3005190096","display_name":null,"funder_award_id":"EU 2014-2020","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G453067482","display_name":null,"funder_award_id":"014-2020","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G5634946813","display_name":null,"funder_award_id":"2014-2020","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"}],"funders":[{"id":"https://openalex.org/F4320335322","display_name":"European Regional Development Fund","ror":"https://ror.org/00k4n6c32"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W10575494","https://openalex.org/W45609739","https://openalex.org/W313766896","https://openalex.org/W1998729616","https://openalex.org/W2001803719","https://openalex.org/W2005173844","https://openalex.org/W2013848960","https://openalex.org/W2055377242","https://openalex.org/W2057669757","https://openalex.org/W2091266855","https://openalex.org/W2092881757","https://openalex.org/W2100256130","https://openalex.org/W2113169315","https://openalex.org/W2124410444","https://openalex.org/W2154032543","https://openalex.org/W2533973874","https://openalex.org/W2777421064","https://openalex.org/W2915774443","https://openalex.org/W4377837580","https://openalex.org/W6601880168","https://openalex.org/W6611000133"],"related_works":["https://openalex.org/W2376554757","https://openalex.org/W612150824","https://openalex.org/W2361959990","https://openalex.org/W2100945520","https://openalex.org/W1596512750","https://openalex.org/W2383443050","https://openalex.org/W2367702734","https://openalex.org/W644339423","https://openalex.org/W2386525189","https://openalex.org/W4225580798"],"abstract_inverted_index":{"This":[0],"paper":[1],"describes":[2],"first":[3],"large":[4],"scale":[5],"article":[6,52,80],"detection":[7,53],"and":[8,54,83],"extraction":[9],"efforts":[10,88],"on":[11,43,89],"the":[12,18,39,66,90,98],"Finnish":[13],"Digi1":[14],"newspaper":[15,35],"material":[16,58],"of":[17,21,26,38,51,77,101,103],"National":[19],"Library":[20],"Finland":[22],"(NLF)":[23],"using":[24],"data":[25],"one":[27],"newspaper,":[28],"Uusi":[29],"Suometar":[30],"1869-1898.":[31],"The":[32,47],"historical":[33],"digital":[34],"archive":[36],"environment":[37],"NLF":[40],"is":[41,49],"based":[42,94],"commercial":[44],"docWorks2":[45],"software.":[46],"software":[48],"capable":[50],"extraction,":[55],"but":[56],"our":[57,87],"does":[59],"not":[60],"seem":[61],"to":[62],"behave":[63],"well":[64],"in":[65,68,75],"system":[67,82],"this":[69],"respect.":[70],"Therefore,":[71],"we":[72],"have":[73,84],"been":[74],"search":[76],"an":[78],"alternative":[79],"segmentation":[81],"now":[85],"focused":[86],"PIVAJ":[91],"machine":[92],"learning":[93],"platform":[95],"developed":[96],"at":[97],"LITIS":[99],"laboratory":[100],"University":[102],"Rouen":[104],"Normandy":[105],"[11--13,":[106],"16,":[107],"17].":[108]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
