{"id":"https://openalex.org/W2127581753","doi":"https://doi.org/10.1109/dial.2004.1263262","title":"Machine learning methods for automatically processing historical documents: from paper acquisition to XML transformation","display_name":"Machine learning methods for automatically processing historical documents: from paper acquisition to XML transformation","publication_year":2004,"publication_date":"2004-06-10","ids":{"openalex":"https://openalex.org/W2127581753","doi":"https://doi.org/10.1109/dial.2004.1263262","mag":"2127581753"},"language":"en","primary_location":{"id":"doi:10.1109/dial.2004.1263262","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dial.2004.1263262","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"First International Workshop on Document Image Analysis for Libraries, 2004. Proceedings.","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035635558","display_name":"Floriana Esposito","orcid":"https://orcid.org/0000-0002-1075-3239"},"institutions":[{"id":"https://openalex.org/I5561750","display_name":"University of Bari Aldo Moro","ror":"https://ror.org/027ynra39","country_code":"IT","type":"education","lineage":["https://openalex.org/I5561750"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"F. Esposito","raw_affiliation_strings":["Dipartimento di Informatica, Universit\u00e0 di Bari, Bari, Italy","Dipt. di Informatica, Bari Univ., Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dipartimento di Informatica, Universit\u00e0 di Bari, Bari, Italy","institution_ids":["https://openalex.org/I5561750"]},{"raw_affiliation_string":"Dipt. di Informatica, Bari Univ., Italy","institution_ids":["https://openalex.org/I5561750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035789137","display_name":"Donato Malerba","orcid":"https://orcid.org/0000-0001-8432-4608"},"institutions":[{"id":"https://openalex.org/I5561750","display_name":"University of Bari Aldo Moro","ror":"https://ror.org/027ynra39","country_code":"IT","type":"education","lineage":["https://openalex.org/I5561750"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"D. Malerba","raw_affiliation_strings":["Dipartimento di Informatica, Universit\u00e0 di Bari, Bari, Italy","Dipt. di Informatica, Bari Univ., Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dipartimento di Informatica, Universit\u00e0 di Bari, Bari, Italy","institution_ids":["https://openalex.org/I5561750"]},{"raw_affiliation_string":"Dipt. di Informatica, Bari Univ., Italy","institution_ids":["https://openalex.org/I5561750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059814300","display_name":"Giovanni Semeraro","orcid":"https://orcid.org/0000-0001-6883-1853"},"institutions":[{"id":"https://openalex.org/I5561750","display_name":"University of Bari Aldo Moro","ror":"https://ror.org/027ynra39","country_code":"IT","type":"education","lineage":["https://openalex.org/I5561750"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"G. Semeraro","raw_affiliation_strings":["Dipartimento di Informatica, Universit\u00e0 di Bari, Bari, Italy","Dipt. di Informatica, Bari Univ., Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dipartimento di Informatica, Universit\u00e0 di Bari, Bari, Italy","institution_ids":["https://openalex.org/I5561750"]},{"raw_affiliation_string":"Dipt. di Informatica, Bari Univ., Italy","institution_ids":["https://openalex.org/I5561750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109845985","display_name":"Stefano Ferilli","orcid":null},"institutions":[{"id":"https://openalex.org/I5561750","display_name":"University of Bari Aldo Moro","ror":"https://ror.org/027ynra39","country_code":"IT","type":"education","lineage":["https://openalex.org/I5561750"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"S. Ferilli","raw_affiliation_strings":["Dipartimento di Informatica, Universit\u00e0 di Bari, Bari, Italy","Dipt. di Informatica, Bari Univ., Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dipartimento di Informatica, Universit\u00e0 di Bari, Bari, Italy","institution_ids":["https://openalex.org/I5561750"]},{"raw_affiliation_string":"Dipt. di Informatica, Bari Univ., Italy","institution_ids":["https://openalex.org/I5561750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030643448","display_name":"O. Altamura","orcid":null},"institutions":[{"id":"https://openalex.org/I5561750","display_name":"University of Bari Aldo Moro","ror":"https://ror.org/027ynra39","country_code":"IT","type":"education","lineage":["https://openalex.org/I5561750"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"O. Altamura","raw_affiliation_strings":["Dipartimento di Informatica, Universit\u00e0 di Bari, Bari, Italy","Dipt. di Informatica, Bari Univ., Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dipartimento di Informatica, Universit\u00e0 di Bari, Bari, Italy","institution_ids":["https://openalex.org/I5561750"]},{"raw_affiliation_string":"Dipt. di Informatica, Bari Univ., Italy","institution_ids":["https://openalex.org/I5561750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073443868","display_name":"Teresa M. A. Basile","orcid":"https://orcid.org/0000-0003-4558-3249"},"institutions":[{"id":"https://openalex.org/I5561750","display_name":"University of Bari Aldo Moro","ror":"https://ror.org/027ynra39","country_code":"IT","type":"education","lineage":["https://openalex.org/I5561750"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"T.M.A. Basile","raw_affiliation_strings":["Dipartimento di Informatica, Universit\u00e0 di Bari, Bari, Italy","Dipt. di Informatica, Bari Univ., Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dipartimento di Informatica, Universit\u00e0 di Bari, Bari, Italy","institution_ids":["https://openalex.org/I5561750"]},{"raw_affiliation_string":"Dipt. di Informatica, Bari Univ., Italy","institution_ids":["https://openalex.org/I5561750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041745569","display_name":"Margherita Berardi","orcid":null},"institutions":[{"id":"https://openalex.org/I5561750","display_name":"University of Bari Aldo Moro","ror":"https://ror.org/027ynra39","country_code":"IT","type":"education","lineage":["https://openalex.org/I5561750"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"M. Berardi","raw_affiliation_strings":["Dipartimento di Informatica, Universit\u00e0 di Bari, Bari, Italy","Dipt. di Informatica, Bari Univ., Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dipartimento di Informatica, Universit\u00e0 di Bari, Bari, Italy","institution_ids":["https://openalex.org/I5561750"]},{"raw_affiliation_string":"Dipt. di Informatica, Bari Univ., Italy","institution_ids":["https://openalex.org/I5561750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073750332","display_name":"Michelangelo Ceci","orcid":"https://orcid.org/0000-0002-6690-7583"},"institutions":[{"id":"https://openalex.org/I5561750","display_name":"University of Bari Aldo Moro","ror":"https://ror.org/027ynra39","country_code":"IT","type":"education","lineage":["https://openalex.org/I5561750"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"M. Ceci","raw_affiliation_strings":["Dipartimento di Informatica, Universit\u00e0 di Bari, Bari, Italy","Dipt. di Informatica, Bari Univ., Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dipartimento di Informatica, Universit\u00e0 di Bari, Bari, Italy","institution_ids":["https://openalex.org/I5561750"]},{"raw_affiliation_string":"Dipt. di Informatica, Bari Univ., Italy","institution_ids":["https://openalex.org/I5561750"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047232379","display_name":"Nicola Di Mauro","orcid":"https://orcid.org/0000-0002-5858-1931"},"institutions":[{"id":"https://openalex.org/I5561750","display_name":"University of Bari Aldo Moro","ror":"https://ror.org/027ynra39","country_code":"IT","type":"education","lineage":["https://openalex.org/I5561750"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"N. Di Mauro","raw_affiliation_strings":["Dipartimento di Informatica, Universit\u00e0 di Bari, Bari, Italy","Dipt. di Informatica, Bari Univ., Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dipartimento di Informatica, Universit\u00e0 di Bari, Bari, Italy","institution_ids":["https://openalex.org/I5561750"]},{"raw_affiliation_string":"Dipt. di Informatica, Bari Univ., Italy","institution_ids":["https://openalex.org/I5561750"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.6575,"has_fulltext":false,"cited_by_count":24,"citation_normalized_percentile":{"value":0.85379764,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"328","last_page":"335"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12377","display_name":"Digital Humanities and Scholarship","score":0.974399983882904,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8421639204025269},{"id":"https://openalex.org/keywords/collaboratory","display_name":"Collaboratory","score":0.7550631761550903},{"id":"https://openalex.org/keywords/xml","display_name":"XML","score":0.7457825541496277},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6243977546691895},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.5023479461669922},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.49405553936958313},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.45990774035453796},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09543609619140625}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8421639204025269},{"id":"https://openalex.org/C2776787376","wikidata":"https://www.wikidata.org/wiki/Q5145870","display_name":"Collaboratory","level":2,"score":0.7550631761550903},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.7457825541496277},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6243977546691895},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.5023479461669922},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.49405553936958313},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.45990774035453796},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09543609619140625},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/dial.2004.1263262","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dial.2004.1263262","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"First International Workshop on Document Image Analysis for Libraries, 2004. Proceedings.","raw_type":"proceedings-article"},{"id":"pmh:oai:ricerca.uniba.it:11586/127860","is_oa":false,"landing_page_url":"http://hdl.handle.net/11586/127860","pdf_url":null,"source":{"id":"https://openalex.org/S4377196296","display_name":"CINECA IRIS Institutional Research Information System (University of Bari Aldo Moro)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I5561750","host_organization_name":"University of Bari Aldo Moro","host_organization_lineage":["https://openalex.org/I5561750"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5799999833106995}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W44107710","https://openalex.org/W111063460","https://openalex.org/W1487743140","https://openalex.org/W1488790962","https://openalex.org/W1502424784","https://openalex.org/W1541434994","https://openalex.org/W1983284692","https://openalex.org/W2015432638","https://openalex.org/W2115888011","https://openalex.org/W2120147695","https://openalex.org/W2135085165","https://openalex.org/W2142231912","https://openalex.org/W2163386034","https://openalex.org/W2167685423","https://openalex.org/W2526346152","https://openalex.org/W3023264106","https://openalex.org/W4206370914","https://openalex.org/W4285719527","https://openalex.org/W6601810314","https://openalex.org/W6604505951"],"related_works":["https://openalex.org/W4247240413","https://openalex.org/W2063389094","https://openalex.org/W2152394036","https://openalex.org/W3185091427","https://openalex.org/W2137575140","https://openalex.org/W2121423739","https://openalex.org/W1607822470","https://openalex.org/W4200615067","https://openalex.org/W2000959261","https://openalex.org/W2971928279"],"abstract_inverted_index":{"One":[0],"of":[1,4,30,76],"the":[2,5,28,74],"aims":[3],"EU":[6],"project":[7],"COLLATE":[8],"is":[9],"to":[10,53,90],"design":[11],"and":[12,20,37,95],"implement":[13],"a":[14,32,65,77,93],"Web-based":[15],"collaboratory":[16],"for":[17,45],"archives,":[18,41],"scientists":[19],"end-users":[21],"working":[22],"with":[23],"digitized":[24],"cultural":[25],"material.":[26],"Since":[27],"originals":[29],"such":[31,68,92],"material":[33],"are":[34],"often":[35],"unique":[36],"scattered":[38],"in":[39,88,100],"various":[40],"severe":[42],"problems":[43],"arise":[44],"their":[46],"wide":[47],"fruition.":[48],"A":[49],"solution":[50],"would":[51],"be":[52],"develop":[54],"intelligent":[55],"document":[56,78],"processing":[57,79],"tools":[58],"that":[59],"automatically":[60],"transform":[61],"printed":[62],"documents":[63],"into":[64],"Web-accessible":[66],"form":[67],"as":[69],"XML.":[70],"Here,":[71],"we":[72],"propose":[73],"use":[75],"system,":[80],"WISDOM++,":[81],"which":[82],"uses":[83],"heavily":[84],"machine":[85],"learning":[86],"techniques":[87],"order":[89],"perform":[91],"task,":[94],"report":[96],"promising":[97],"results":[98],"obtained":[99],"preliminary":[101],"experiments.":[102]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2016-06-24T00:00:00"}
