{"id":"https://openalex.org/W2099262596","doi":"https://doi.org/10.1145/1568296.1568306","title":"A comprehensive evaluation methodology for noisy historical document recognition techniques","display_name":"A comprehensive evaluation methodology for noisy historical document recognition techniques","publication_year":2009,"publication_date":"2009-07-23","ids":{"openalex":"https://openalex.org/W2099262596","doi":"https://doi.org/10.1145/1568296.1568306","mag":"2099262596"},"language":"en","primary_location":{"id":"doi:10.1145/1568296.1568306","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1568296.1568306","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of The Third Workshop on Analytics for Noisy Unstructured Text Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031191307","display_name":"Nikolaos Stamatopoulos","orcid":"https://orcid.org/0000-0001-8785-5156"},"institutions":[{"id":"https://openalex.org/I203474044","display_name":"National Centre of Scientific Research \"Demokritos\"","ror":"https://ror.org/038jp4m40","country_code":"GR","type":"facility","lineage":["https://openalex.org/I203474044"]},{"id":"https://openalex.org/I4387152169","display_name":"Institute of Informatics & Telecommunications","ror":"https://ror.org/0396t6k89","country_code":null,"type":"facility","lineage":["https://openalex.org/I203474044","https://openalex.org/I4387152169"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"Nikolaos Stamatopoulos","raw_affiliation_strings":["Institute of Informatics and Telecommunications, NCSR \"Demokritos\", Agia Paraskevi, Athens, Greece"],"affiliations":[{"raw_affiliation_string":"Institute of Informatics and Telecommunications, NCSR \"Demokritos\", Agia Paraskevi, Athens, Greece","institution_ids":["https://openalex.org/I203474044","https://openalex.org/I4387152169"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057141895","display_name":"Georgios Louloudis","orcid":"https://orcid.org/0000-0003-4127-3796"},"institutions":[{"id":"https://openalex.org/I200777214","display_name":"National and Kapodistrian University of Athens","ror":"https://ror.org/04gnjpq42","country_code":"GR","type":"education","lineage":["https://openalex.org/I200777214"]},{"id":"https://openalex.org/I203474044","display_name":"National Centre of Scientific Research \"Demokritos\"","ror":"https://ror.org/038jp4m40","country_code":"GR","type":"facility","lineage":["https://openalex.org/I203474044"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Georgios Louloudis","raw_affiliation_strings":["Institute of Informatics and Telecommunications, NCSR \"Demokritos\", Agia Paraskevi, Athens, Greece and University of Athens, Greece","Institute of Informatics and Telecommunications, NCSR Demokritos, Agia Paraskevi, Athens, Greece and University of Athens, Greece#TAB#"],"affiliations":[{"raw_affiliation_string":"Institute of Informatics and Telecommunications, NCSR \"Demokritos\", Agia Paraskevi, Athens, Greece and University of Athens, Greece","institution_ids":["https://openalex.org/I203474044","https://openalex.org/I200777214"]},{"raw_affiliation_string":"Institute of Informatics and Telecommunications, NCSR Demokritos, Agia Paraskevi, Athens, Greece and University of Athens, Greece#TAB#","institution_ids":["https://openalex.org/I200777214"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006291450","display_name":"Basilis Gatos","orcid":"https://orcid.org/0000-0001-9873-0826"},"institutions":[{"id":"https://openalex.org/I4387152169","display_name":"Institute of Informatics & Telecommunications","ror":"https://ror.org/0396t6k89","country_code":null,"type":"facility","lineage":["https://openalex.org/I203474044","https://openalex.org/I4387152169"]},{"id":"https://openalex.org/I203474044","display_name":"National Centre of Scientific Research \"Demokritos\"","ror":"https://ror.org/038jp4m40","country_code":"GR","type":"facility","lineage":["https://openalex.org/I203474044"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Basilis Gatos","raw_affiliation_strings":["Institute of Informatics and Telecommunications, NCSR \"Demokritos\", Agia Paraskevi, Athens, Greece"],"affiliations":[{"raw_affiliation_string":"Institute of Informatics and Telecommunications, NCSR \"Demokritos\", Agia Paraskevi, Athens, Greece","institution_ids":["https://openalex.org/I203474044","https://openalex.org/I4387152169"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5031191307"],"corresponding_institution_ids":["https://openalex.org/I203474044","https://openalex.org/I4387152169"],"apc_list":null,"apc_paid":null,"fwci":0.6561,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.71706879,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"47","last_page":"54"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.86997389793396},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8130524754524231},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7818343639373779},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.6774808764457703},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.6676508188247681},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.6332868933677673},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6262393593788147},{"id":"https://openalex.org/keywords/historical-document","display_name":"Historical document","score":0.5868271589279175},{"id":"https://openalex.org/keywords/line","display_name":"Line (geometry)","score":0.5813562273979187},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5798776745796204},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5662457346916199},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.5616452097892761},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.4631575644016266},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.44494912028312683},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.40948250889778137},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.32556265592575073},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.32271838188171387},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.2694064974784851},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.17933860421180725},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.09592720866203308},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07757642865180969}],"concepts":[{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.86997389793396},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8130524754524231},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7818343639373779},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.6774808764457703},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.6676508188247681},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.6332868933677673},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6262393593788147},{"id":"https://openalex.org/C2778371909","wikidata":"https://www.wikidata.org/wiki/Q3771738","display_name":"Historical document","level":2,"score":0.5868271589279175},{"id":"https://openalex.org/C198352243","wikidata":"https://www.wikidata.org/wiki/Q37105","display_name":"Line (geometry)","level":2,"score":0.5813562273979187},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5798776745796204},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5662457346916199},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.5616452097892761},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.4631575644016266},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.44494912028312683},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40948250889778137},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.32556265592575073},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.32271838188171387},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2694064974784851},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.17933860421180725},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.09592720866203308},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07757642865180969},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/1568296.1568306","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1568296.1568306","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of The Third Workshop on Analytics for Noisy Unstructured Text Data","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.158.8885","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.158.8885","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.iit.demokritos.gr/~bgat/p47-stamatopoulos.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.711.8321","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.711.8321","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://users.iit.demokritos.gr/%7Enstam/page_files/AND_2009_Stamatopoulos.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G404730546","display_name":null,"funder_award_id":"215064","funder_id":"https://openalex.org/F4320334960","funder_display_name":"Seventh Framework Programme"}],"funders":[{"id":"https://openalex.org/F4320334960","display_name":"Seventh Framework Programme","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1968568012","https://openalex.org/W2016121186","https://openalex.org/W2019025183","https://openalex.org/W2024077020","https://openalex.org/W2052918223","https://openalex.org/W2063476404","https://openalex.org/W2068113013","https://openalex.org/W2084980974","https://openalex.org/W2107645346","https://openalex.org/W2127319406","https://openalex.org/W2128318879","https://openalex.org/W2131132193","https://openalex.org/W2133308113","https://openalex.org/W2134097102","https://openalex.org/W2142404595","https://openalex.org/W2154045346","https://openalex.org/W2154495259","https://openalex.org/W2154646934","https://openalex.org/W2362808110","https://openalex.org/W4238410424","https://openalex.org/W4243975084","https://openalex.org/W4299296451"],"related_works":["https://openalex.org/W2088281698","https://openalex.org/W2784287639","https://openalex.org/W3023805750","https://openalex.org/W4294702218","https://openalex.org/W2099262596","https://openalex.org/W2021525266","https://openalex.org/W2119179626","https://openalex.org/W2038329042","https://openalex.org/W4308094881","https://openalex.org/W2131730163"],"abstract_inverted_index":{"In":[0],"this":[1,46],"paper,":[2],"we":[3,48],"propose":[4],"a":[5,74,173,179],"new":[6],"comprehensive":[7],"methodology":[8,70],"in":[9,77,104],"order":[10,78,105],"to":[11,23,79,106,178],"evaluate":[12,24],"the":[13,27,34,51,62,65,81,94,108,111,119,128,146,149,155],"performance":[14,109],"of":[15,38,64,72,93,100,110,118,151],"noisy":[16,29],"historical":[17,66,180],"document":[18,96],"recognition":[19,30],"techniques.":[20],"We":[21],"aim":[22],"not":[25],"only":[26],"final":[28,112],"result":[31,114],"but":[32],"also":[33,183],"main":[35],"intermediate":[36,120],"stages":[37],"text":[39,52,82,156],"line,":[40,53,83,157],"word":[41,54,84,158],"and":[42,55,85,135,138,159],"character":[43,56,86,160],"segmentation.":[44],"For":[45],"purpose,":[47],"efficiently":[49],"create":[50],"segmentation":[57,87,121,161],"ground":[58,88,129,162],"truth":[59,89,130,163],"guided":[60],"by":[61],"transcription":[63],"documents.":[67],"The":[68,123],"proposed":[69,147],"consists":[71],"(i)":[73],"semiautomatic":[75],"procedure":[76,125],"detect":[80],"regions":[90,131],"making":[91],"use":[92],"correct":[95],"transcription,":[97],"(ii)":[98],"calculation":[99],"proper":[101],"evaluation":[102],"metrics":[103],"measure":[107],"OCR":[113,175],"as":[115,117],"well":[116],"stages.":[122],"semi-automatic":[124],"for":[126,154],"detecting":[127],"has":[132],"been":[133],"evaluated":[134],"proved":[136],"efficient":[137],"time":[139,152],"saving.":[140],"Experimental":[141],"results":[142],"prove":[143],"that":[144],"using":[145,172],"technique,":[148],"percentage":[150],"saved":[153],"creation":[164],"is":[165,182],"more":[166],"than":[167],"90%.":[168],"An":[169],"analytic":[170],"experiment":[171],"commercial":[174],"engine":[176],"applied":[177],"book":[181],"presented.":[184]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2014,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
