{"id":"https://openalex.org/W2737650961","doi":"https://doi.org/10.1145/3078081.3078103","title":"Clear-cut methodology for Arabic OCR and post-correction with low technical skilled annotators","display_name":"Clear-cut methodology for Arabic OCR and post-correction with low technical skilled annotators","publication_year":2017,"publication_date":"2017-06-01","ids":{"openalex":"https://openalex.org/W2737650961","doi":"https://doi.org/10.1145/3078081.3078103","mag":"2737650961"},"language":"en","primary_location":{"id":"doi:10.1145/3078081.3078103","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3078081.3078103","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2nd International Conference on Digital Access to Textual Cultural Heritage","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029539785","display_name":"Alicia Gonz\u00e1lez Mart\u00ednez","orcid":null},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Alicia Gonz\u00e1lez Mart\u00ednez","raw_affiliation_strings":["Universit\u00e4t Hamburg, ERC-Project COBHUNI, Edmund-Siemers, Hamburg"],"affiliations":[{"raw_affiliation_string":"Universit\u00e4t Hamburg, ERC-Project COBHUNI, Edmund-Siemers, Hamburg","institution_ids":["https://openalex.org/I159176309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049603707","display_name":"Tillmann Feige","orcid":null},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Tillmann Feige","raw_affiliation_strings":["Universit\u00e4t Hamburg, ERC-Project COBHUNI, Edmund-Siemers, Hamburg"],"affiliations":[{"raw_affiliation_string":"Universit\u00e4t Hamburg, ERC-Project COBHUNI, Edmund-Siemers, Hamburg","institution_ids":["https://openalex.org/I159176309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025654164","display_name":"Thomas Eich","orcid":null},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Thomas Eich","raw_affiliation_strings":["Universit\u00e4t Hamburg, ERC-Project COBHUNI, Edmund-Siemers, Hamburg"],"affiliations":[{"raw_affiliation_string":"Universit\u00e4t Hamburg, ERC-Project COBHUNI, Edmund-Siemers, Hamburg","institution_ids":["https://openalex.org/I159176309"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5029539785"],"corresponding_institution_ids":["https://openalex.org/I159176309"],"apc_list":null,"apc_paid":null,"fwci":0.4542,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.65310475,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"67","last_page":"70"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.9746000170707703,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.9746000170707703,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9448999762535095,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8721396923065186},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.7147108912467957},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6465297341346741},{"id":"https://openalex.org/keywords/usability","display_name":"Usability","score":0.5562604665756226},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5546141862869263},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.554575502872467},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5066760182380676},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4772234261035919},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.41007667779922485},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3271061182022095},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2202972173690796},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.16460981965065002},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.12208342552185059}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8721396923065186},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.7147108912467957},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6465297341346741},{"id":"https://openalex.org/C170130773","wikidata":"https://www.wikidata.org/wiki/Q216378","display_name":"Usability","level":2,"score":0.5562604665756226},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5546141862869263},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.554575502872467},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5066760182380676},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4772234261035919},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.41007667779922485},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3271061182022095},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2202972173690796},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.16460981965065002},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.12208342552185059},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3078081.3078103","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3078081.3078103","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2nd International Conference on Digital Access to Textual Cultural Heritage","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6800000071525574,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W609568101","https://openalex.org/W2339562889","https://openalex.org/W2963837629"],"related_works":["https://openalex.org/W4389670110","https://openalex.org/W2429057255","https://openalex.org/W2187546663","https://openalex.org/W148745890","https://openalex.org/W2611942503","https://openalex.org/W4315621326","https://openalex.org/W2899790217","https://openalex.org/W2280422768","https://openalex.org/W3143197806","https://openalex.org/W1576092969"],"abstract_inverted_index":{"This":[0],"paper":[1],"describes":[2],"an":[3,130,196],"efficient":[4],"and":[5,10,108,119,132,139,184,212,215],"straightforward":[6],"methodology":[7,207],"for":[8,19,42,126,135],"OCR-ing":[9,214],"post-correcting":[11],"Arabic":[12,36],"text":[13],"material":[14,85],"on":[15,50,88,200],"Islamic":[16],"embryology":[17],"collected":[18],"the":[20,24,28,35,38,44,74,84,93,112,117,120,137,143,161,168,173,193,210],"COBHUNI":[21],"project.":[22],"As":[23],"target":[25,75],"texts":[26,76],"of":[27,34,40,73,83,102,106,116,158,172,195,205,217,220],"project":[29],"include":[30],"diverse":[31],"diachronic":[32],"stages":[33],"language,":[37],"team":[39],"annotators":[41,118],"performing":[43,136],"OCR":[45,94,138,144,151],"post-correction":[46,140,162,187,216],"requires":[47],"well-trained":[48],"experts":[49,62],"language":[51,61],"skills.":[52],"While":[53],"technical":[54,66,114],"skills":[55,115],"are":[56],"also":[57],"desirable,":[58],"highly":[59],"trained":[60],"typically":[63],"lack":[64],"enough":[65],"knowledge.":[67],"Furthermore,":[68],"a":[69,99,179,218],"relatively":[70],"small":[71,100],"portion":[72],"needed":[77],"to":[78,128,166],"be":[79],"OCR-ed,":[80],"as":[81,176],"most":[82],"was":[86,189],"already":[87],"some":[89],"digital":[90],"form.":[91],"Thus,":[92],"task":[95,188],"could":[96],"only":[97],"require":[98],"amount":[101],"resources":[103],"in":[104,209],"terms":[105],"time":[107],"work":[109],"complexity.":[110],"Both":[111],"low":[113],"resource":[121],"constraints":[122],"made":[123],"it":[124,154,177],"necessary":[125],"us":[127],"find":[129],"easy-to-develop":[131],"suitable":[133],"workflow":[134],"tasks.":[141],"For":[142,160],"phase,":[145,163],"we":[146,164],"chose":[147],"Tesseract":[148],"Open":[149],"Source":[150],"Engine,":[152],"because":[153],"achieves":[155],"state-of-the-art":[156],"levels":[157],"accuracy.":[159],"decided":[165],"use":[167],"Proofread":[169],"Page":[170],"extension":[171],"MediaWiki":[174],"software,":[175],"strikes":[178],"perfect":[180],"balance":[181],"between":[182],"usability":[183],"efficiency.":[185],"The":[186,203],"additionally":[190],"supported":[191],"by":[192],"implementation":[194],"error":[197],"checker":[198],"based":[199],"simple":[201],"heuristics.":[202],"application":[204],"this":[206],"resulted":[208],"successful":[211],"fast":[213],"corpus":[219],"36,132":[221],"tokens.":[222]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2019,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
