{"id":"https://openalex.org/W3093265417","doi":"https://doi.org/10.3390/jimaging6100109","title":"One Step Is Not Enough: A Multi-Step Procedure for Building the Training Set of a Query by String Keyword Spotting System to Assist the Transcription of Historical Document","display_name":"One Step Is Not Enough: A Multi-Step Procedure for Building the Training Set of a Query by String Keyword Spotting System to Assist the Transcription of Historical Document","publication_year":2020,"publication_date":"2020-10-13","ids":{"openalex":"https://openalex.org/W3093265417","doi":"https://doi.org/10.3390/jimaging6100109","mag":"3093265417","pmid":"https://pubmed.ncbi.nlm.nih.gov/34460550"},"language":"en","primary_location":{"id":"doi:10.3390/jimaging6100109","is_oa":true,"landing_page_url":"https://doi.org/10.3390/jimaging6100109","pdf_url":"https://www.mdpi.com/2313-433X/6/10/109/pdf?version=1603189736","source":{"id":"https://openalex.org/S2736465063","display_name":"Journal of Imaging","issn_l":"2313-433X","issn":["2313-433X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Imaging","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2313-433X/6/10/109/pdf?version=1603189736","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048376385","display_name":"Antonio Parziale","orcid":"https://orcid.org/0000-0003-2911-9737"},"institutions":[{"id":"https://openalex.org/I131729948","display_name":"University of Salerno","ror":"https://ror.org/0192m2k53","country_code":"IT","type":"education","lineage":["https://openalex.org/I131729948"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Antonio Parziale","raw_affiliation_strings":["Department of Information and Electrical Engineering and Applied Mathematics, University of Salerno, Via Giovanni Paolo II, 132, 84084 Fisciano (SA), Italy"],"affiliations":[{"raw_affiliation_string":"Department of Information and Electrical Engineering and Applied Mathematics, University of Salerno, Via Giovanni Paolo II, 132, 84084 Fisciano (SA), Italy","institution_ids":["https://openalex.org/I131729948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040481426","display_name":"Giuliana Capriolo","orcid":"https://orcid.org/0000-0003-4312-0814"},"institutions":[{"id":"https://openalex.org/I131729948","display_name":"University of Salerno","ror":"https://ror.org/0192m2k53","country_code":"IT","type":"education","lineage":["https://openalex.org/I131729948"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Giuliana Capriolo","raw_affiliation_strings":["Department of Cultural Heritage, University of Salerno, Via Giovanni Paolo II, 132, 84084 Fisciano (SA), Italy"],"affiliations":[{"raw_affiliation_string":"Department of Cultural Heritage, University of Salerno, Via Giovanni Paolo II, 132, 84084 Fisciano (SA), Italy","institution_ids":["https://openalex.org/I131729948"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048265453","display_name":"Angelo Marcelli","orcid":"https://orcid.org/0000-0002-2019-2826"},"institutions":[{"id":"https://openalex.org/I131729948","display_name":"University of Salerno","ror":"https://ror.org/0192m2k53","country_code":"IT","type":"education","lineage":["https://openalex.org/I131729948"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Angelo Marcelli","raw_affiliation_strings":["Department of Information and Electrical Engineering and Applied Mathematics, University of Salerno, Via Giovanni Paolo II, 132, 84084 Fisciano (SA), Italy"],"affiliations":[{"raw_affiliation_string":"Department of Information and Electrical Engineering and Applied Mathematics, University of Salerno, Via Giovanni Paolo II, 132, 84084 Fisciano (SA), Italy","institution_ids":["https://openalex.org/I131729948"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5048376385"],"corresponding_institution_ids":["https://openalex.org/I131729948"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":0.3925,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.62754712,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"6","issue":"10","first_page":"109","last_page":"109"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8455958962440491},{"id":"https://openalex.org/keywords/spotting","display_name":"Spotting","score":0.8268590569496155},{"id":"https://openalex.org/keywords/keyword-spotting","display_name":"Keyword spotting","score":0.7366358041763306},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6084532141685486},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.5769850015640259},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5557702779769897},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.41947704553604126},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4181232452392578},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.41105201840400696},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3128218650817871}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8455958962440491},{"id":"https://openalex.org/C2779506182","wikidata":"https://www.wikidata.org/wiki/Q7580141","display_name":"Spotting","level":2,"score":0.8268590569496155},{"id":"https://openalex.org/C2781213101","wikidata":"https://www.wikidata.org/wiki/Q6398558","display_name":"Keyword spotting","level":2,"score":0.7366358041763306},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6084532141685486},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.5769850015640259},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5557702779769897},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.41947704553604126},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4181232452392578},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.41105201840400696},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3128218650817871},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.3390/jimaging6100109","is_oa":true,"landing_page_url":"https://doi.org/10.3390/jimaging6100109","pdf_url":"https://www.mdpi.com/2313-433X/6/10/109/pdf?version=1603189736","source":{"id":"https://openalex.org/S2736465063","display_name":"Journal of Imaging","issn_l":"2313-433X","issn":["2313-433X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Imaging","raw_type":"journal-article"},{"id":"pmid:34460550","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/34460550","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of imaging","raw_type":null},{"id":"pmh:oai:doaj.org/article:2af61cf1065e434cac8c8777757f7437","is_oa":true,"landing_page_url":"https://doaj.org/article/2af61cf1065e434cac8c8777757f7437","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Imaging, Vol 6, Iss 10, p 109 (2020)","raw_type":"article"},{"id":"pmh:oai:pubmedcentral.nih.gov:8321172","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/8321172","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"J Imaging","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/jimaging6100109","is_oa":true,"landing_page_url":"https://doi.org/10.3390/jimaging6100109","pdf_url":"https://www.mdpi.com/2313-433X/6/10/109/pdf?version=1603189736","source":{"id":"https://openalex.org/S2736465063","display_name":"Journal of Imaging","issn_l":"2313-433X","issn":["2313-433X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Imaging","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.699999988079071,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3093265417.pdf","grobid_xml":"https://content.openalex.org/works/W3093265417.grobid-xml"},"referenced_works_count":50,"referenced_works":["https://openalex.org/W657471641","https://openalex.org/W1862875023","https://openalex.org/W1932188282","https://openalex.org/W1989737761","https://openalex.org/W2005267105","https://openalex.org/W2013962506","https://openalex.org/W2024272541","https://openalex.org/W2031868750","https://openalex.org/W2039468858","https://openalex.org/W2045706564","https://openalex.org/W2060746081","https://openalex.org/W2062889977","https://openalex.org/W2072058045","https://openalex.org/W2097663110","https://openalex.org/W2100478624","https://openalex.org/W2106986062","https://openalex.org/W2108596040","https://openalex.org/W2115105643","https://openalex.org/W2122585011","https://openalex.org/W2133059825","https://openalex.org/W2141356776","https://openalex.org/W2147393756","https://openalex.org/W2155673584","https://openalex.org/W2160847757","https://openalex.org/W2167929403","https://openalex.org/W2319343974","https://openalex.org/W2424734786","https://openalex.org/W2578977109","https://openalex.org/W2594443378","https://openalex.org/W2612965162","https://openalex.org/W2753573165","https://openalex.org/W2792302727","https://openalex.org/W2794752813","https://openalex.org/W2799823404","https://openalex.org/W2810648379","https://openalex.org/W2900564790","https://openalex.org/W2906357351","https://openalex.org/W2954379807","https://openalex.org/W2963880074","https://openalex.org/W2963924711","https://openalex.org/W2973710314","https://openalex.org/W2983963578","https://openalex.org/W3000629728","https://openalex.org/W3004116480","https://openalex.org/W3009816030","https://openalex.org/W3011228495","https://openalex.org/W3046334328","https://openalex.org/W3049090482","https://openalex.org/W3080702232","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2918559346","https://openalex.org/W3119978414","https://openalex.org/W2114097550","https://openalex.org/W2516975559","https://openalex.org/W2545741539","https://openalex.org/W3206647229","https://openalex.org/W4286904253","https://openalex.org/W2000885660","https://openalex.org/W1969408022","https://openalex.org/W2117995638"],"abstract_inverted_index":{"Digital":[0],"libraries":[1],"offer":[2],"access":[3],"to":[4,57,151,171],"a":[5,35,66,71,81,85,93,103,134,140,153,168],"large":[6],"number":[7],"of":[8,46,50,60,108,128,137,173],"handwritten":[9],"historical":[10],"documents.":[11],"These":[12],"documents":[13],"are":[14],"available":[15],"as":[16],"raw":[17],"images":[18],"and":[19,32,75],"therefore":[20],"their":[21],"content":[22],"is":[23,30,39,54],"not":[24,42],"searchable.":[25],"A":[26],"fully":[27,36,94,130],"manual":[28,95,131],"transcription":[29,38,52],"time-consuming":[31],"expensive":[33],"while":[34],"automatic":[37,51],"cheaper":[40],"but":[41],"comparable":[43],"in":[44,84,167],"terms":[45],"accuracy.":[47],"The":[48,97,116],"performance":[49],"systems":[53],"strictly":[55],"related":[56],"the":[58,61,89,113,120,124,129,147,158,164,174,179],"composition":[59],"training":[62,82,143],"set.":[63,182],"We":[64],"propose":[65],"multi-step":[67,98,125,165],"procedure":[68,99,126,132,166],"that":[69,118,145],"exploits":[70],"Keyword":[72],"Spotting":[73],"system":[74,150],"human":[76],"validation":[77],"for":[78,177],"building":[79],"up":[80,107],"set":[83,105,122,144],"time":[86,135,169,175],"shorter":[87],"than":[88,157],"one":[90],"required":[91,176],"by":[92],"procedure.":[96],"was":[100,161],"tested":[101],"on":[102],"data":[104,121,181],"made":[106],"50":[109],"pages":[110],"extracted":[111],"from":[112],"Bentham":[114],"collection.":[115],"palaeographer":[117],"transcribed":[119],"with":[123,163],"instead":[127],"had":[133],"gain":[136],"52.54%.":[138],"Moreover,":[139],"small":[141],"size":[142],"allowed":[146],"keyword":[148],"spotting":[149],"show":[152],"precision":[154],"value":[155,160],"greater":[156],"recall":[159],"built":[162],"equal":[170],"35.25%":[172],"annotating":[178],"whole":[180]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
