{"id":"https://openalex.org/W2738953807","doi":"https://doi.org/10.1145/3078081.3078110","title":"OCR of a Mixed Corpus","display_name":"OCR of a Mixed Corpus","publication_year":2017,"publication_date":"2017-06-01","ids":{"openalex":"https://openalex.org/W2738953807","doi":"https://doi.org/10.1145/3078081.3078110","mag":"2738953807"},"language":"en","primary_location":{"id":"doi:10.1145/3078081.3078110","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3078081.3078110","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2nd International Conference on Digital Access to Textual Cultural Heritage","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021243727","display_name":"Manuel Ayuso","orcid":"https://orcid.org/0000-0001-5418-9986"},"institutions":[{"id":"https://openalex.org/I178450904","display_name":"National University of Distance Education","ror":"https://ror.org/02msb5n36","country_code":"ES","type":"education","lineage":["https://openalex.org/I178450904"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Manuel Ayuso","raw_affiliation_strings":["Universidad Nacional de Educaci\u00f3n a Distancia Proyecto BECLaR, Madrid, Spain"],"affiliations":[{"raw_affiliation_string":"Universidad Nacional de Educaci\u00f3n a Distancia Proyecto BECLaR, Madrid, Spain","institution_ids":["https://openalex.org/I178450904"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5021243727"],"corresponding_institution_ids":["https://openalex.org/I178450904"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.12775818,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"77","last_page":"82"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14330","display_name":"Library Science and Information Systems","score":0.9811999797821045,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12377","display_name":"Digital Humanities and Scholarship","score":0.9804999828338623,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7346091270446777},{"id":"https://openalex.org/keywords/spelling","display_name":"Spelling","score":0.713372528553009},{"id":"https://openalex.org/keywords/digitization","display_name":"Digitization","score":0.7018823027610779},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.6404529213905334},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5702474117279053},{"id":"https://openalex.org/keywords/lemmatisation","display_name":"Lemmatisation","score":0.5329782962799072},{"id":"https://openalex.org/keywords/text-recognition","display_name":"Text recognition","score":0.5034400820732117},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4875311553478241},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.4837944209575653},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4395022988319397},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.35208189487457275},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.31831687688827515}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7346091270446777},{"id":"https://openalex.org/C2777801307","wikidata":"https://www.wikidata.org/wiki/Q2088390","display_name":"Spelling","level":2,"score":0.713372528553009},{"id":"https://openalex.org/C2779308522","wikidata":"https://www.wikidata.org/wiki/Q843958","display_name":"Digitization","level":2,"score":0.7018823027610779},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.6404529213905334},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5702474117279053},{"id":"https://openalex.org/C161831844","wikidata":"https://www.wikidata.org/wiki/Q2554325","display_name":"Lemmatisation","level":2,"score":0.5329782962799072},{"id":"https://openalex.org/C2983812711","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Text recognition","level":3,"score":0.5034400820732117},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4875311553478241},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.4837944209575653},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4395022988319397},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.35208189487457275},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.31831687688827515},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3078081.3078110","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3078081.3078110","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2nd International Conference on Digital Access to Textual Cultural Heritage","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.800000011920929,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320323261","display_name":"Universidad Nacional de Educaci\u00f3n a Distancia","ror":"https://ror.org/02msb5n36"},{"id":"https://openalex.org/F4320325356","display_name":"Julius-Maximilians-Universit\u00e4t W\u00fcrzburg","ror":"https://ror.org/00fbnyb24"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W2035342986","https://openalex.org/W2088483389","https://openalex.org/W2534524893","https://openalex.org/W2582326254","https://openalex.org/W2963414181"],"related_works":["https://openalex.org/W3163712264","https://openalex.org/W4306707571","https://openalex.org/W3179897446","https://openalex.org/W2970150031","https://openalex.org/W4309224528","https://openalex.org/W1562999394","https://openalex.org/W4392445496","https://openalex.org/W4362605793","https://openalex.org/W2980285880","https://openalex.org/W3211896491"],"abstract_inverted_index":{"This":[0],"paper":[1],"deals":[2],"with":[3,16,23,142],"the":[4,7,17,20,24,36,45,57,66,81,110,114,125,134,172],"application":[5],"of":[6,19,26,30,44,49,74,124],"digitization":[8],"methods":[9],"designed":[10],"by":[11,147,156],"LMU":[12],"CIS":[13,128],"team":[14],"and":[15,40,90,105,116,176,181,193],"encoding":[18],"data":[21],"obtained":[22],"aim":[25],"building":[27],"an":[28],"edition":[29,180],"a":[31,50,121],"Latin":[32,54],"author":[33],"based":[34],"on":[35,41],"first":[37,82],"printed":[38],"editions":[39],"two":[42,92],"manuscripts":[43],"text.":[46],"As":[47],"part":[48],"research":[51,76],"group":[52],"studying":[53],"authors":[55],"in":[56,69,178],"early":[58],"print":[59],"(BECLaR),":[60],"I":[61,96,132,183],"chose":[62],"Martianus":[63],"Capella":[64],"for":[65,112],"work":[67],"described":[68,146],"this":[70,75],"paper.":[71],"The":[72,140],"corpus":[73],"has":[77,152],"been":[78,153,162],"restricted":[79],"to":[80,88,91,119,165,186],"six":[83],"editions,":[84],"dating":[85],"from":[86],"1499":[87],"1599":[89],"related":[93],"manuscripts.":[94,166],"Firstly,":[95],"used":[97],"some":[98,195],"preprocessing":[99],"image":[100],"tools":[101],"as":[102,109,127,145],"Scantaylor,":[103],"GIMP,":[104],"others,":[106],"secondly":[107],"OCRopus":[108],"engine":[111],"OCR'ing":[113],"images":[115],"then":[117],"PoCoTo":[118],"do":[120],"batch":[122],"correction":[123],"results,":[126],"workshop":[129],"shows.":[130],"Finally,":[131],"labeled":[133],"encoded":[135],"text":[136,175],"using":[137,157],"Oxygenxml":[138],"editor.":[139],"difficulties":[141],"historical":[143],"spelling":[144],"Springmann":[148],"et":[149],"al.":[150],"[1]":[151],"mainly":[154],"solved":[155],"their":[158],"methods,":[159],"which":[160],"have":[161],"also":[163],"applied":[164],"A":[167],"new":[168],"issue":[169],"raised":[170],"is":[171],"delimitation":[173],"between":[174],"paratexts":[177],"every":[179],"manuscript.":[182],"will":[184],"try":[185],"explain":[187],"our":[188,191],"method,":[189],"justify":[190],"choices":[192],"show":[194],"results.":[196]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
