{"id":"https://openalex.org/W2999622538","doi":"https://doi.org/10.1108/ajim-07-2019-0189","title":"Toward the optimized crowdsourcing strategy for OCR post-correction","display_name":"Toward the optimized crowdsourcing strategy for OCR post-correction","publication_year":2019,"publication_date":"2019-12-09","ids":{"openalex":"https://openalex.org/W2999622538","doi":"https://doi.org/10.1108/ajim-07-2019-0189","mag":"2999622538"},"language":"en","primary_location":{"id":"doi:10.1108/ajim-07-2019-0189","is_oa":false,"landing_page_url":"https://doi.org/10.1108/ajim-07-2019-0189","pdf_url":null,"source":{"id":"https://openalex.org/S4210181081","display_name":"Aslib Journal of Information Management","issn_l":"2050-3806","issn":["2050-3806","2050-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319811","host_organization_name":"Emerald Publishing Limited","host_organization_lineage":["https://openalex.org/P4310319811"],"host_organization_lineage_names":["Emerald Publishing Limited"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Aslib Journal of Information Management","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2106.06831","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Omri Suissa","orcid":null},"institutions":[{"id":"https://openalex.org/I13955877","display_name":"Bar-Ilan University","ror":"https://ror.org/03kgsv495","country_code":"IL","type":"education","lineage":["https://openalex.org/I13955877"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"Omri Suissa","raw_affiliation_strings":["Bar-Ilan University, Ramat Gan, Israel"],"affiliations":[{"raw_affiliation_string":"Bar-Ilan University, Ramat Gan, Israel","institution_ids":["https://openalex.org/I13955877"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Avshalom Elmalech","orcid":null},"institutions":[{"id":"https://openalex.org/I13955877","display_name":"Bar-Ilan University","ror":"https://ror.org/03kgsv495","country_code":"IL","type":"education","lineage":["https://openalex.org/I13955877"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Avshalom Elmalech","raw_affiliation_strings":["Bar-Ilan University, Ramat Gan, Israel"],"affiliations":[{"raw_affiliation_string":"Bar-Ilan University, Ramat Gan, Israel","institution_ids":["https://openalex.org/I13955877"]}]},{"author_position":"last","author":{"id":null,"display_name":"Maayan Zhitomirsky-Geffet","orcid":null},"institutions":[{"id":"https://openalex.org/I13955877","display_name":"Bar-Ilan University","ror":"https://ror.org/03kgsv495","country_code":"IL","type":"education","lineage":["https://openalex.org/I13955877"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Maayan Zhitomirsky-Geffet","raw_affiliation_strings":["Bar-Ilan University, Ramat Gan, Israel"],"affiliations":[{"raw_affiliation_string":"Bar-Ilan University, Ramat Gan, Israel","institution_ids":["https://openalex.org/I13955877"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I13955877"],"apc_list":null,"apc_paid":null,"fwci":0.3065,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.62549996,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"72","issue":"2","first_page":"179","last_page":"197"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.5763999819755554,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.5763999819755554,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.07419999688863754,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10789","display_name":"Interactive and Immersive Displays","score":0.054499998688697815,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/crowdsourcing","display_name":"Crowdsourcing","score":0.941100001335144},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.8079000115394592},{"id":"https://openalex.org/keywords/digitization","display_name":"Digitization","score":0.7860999703407288},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6858999729156494},{"id":"https://openalex.org/keywords/document-processing","display_name":"Document processing","score":0.3483999967575073},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.33079999685287476}],"concepts":[{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.941100001335144},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.8079000115394592},{"id":"https://openalex.org/C2779308522","wikidata":"https://www.wikidata.org/wiki/Q843958","display_name":"Digitization","level":2,"score":0.7860999703407288},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7493000030517578},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6858999729156494},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49050000309944153},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.47450000047683716},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4293999969959259},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3578999936580658},{"id":"https://openalex.org/C67905146","wikidata":"https://www.wikidata.org/wiki/Q5287646","display_name":"Document processing","level":2,"score":0.3483999967575073},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.33079999685287476},{"id":"https://openalex.org/C3018824978","wikidata":"https://www.wikidata.org/wiki/Q2894891","display_name":"Error analysis","level":2,"score":0.3255000114440918},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.30550000071525574},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25780001282691956},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.2531999945640564},{"id":"https://openalex.org/C68699486","wikidata":"https://www.wikidata.org/wiki/Q265904","display_name":"Document Structure Description","level":3,"score":0.2502000033855438}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1108/ajim-07-2019-0189","is_oa":false,"landing_page_url":"https://doi.org/10.1108/ajim-07-2019-0189","pdf_url":null,"source":{"id":"https://openalex.org/S4210181081","display_name":"Aslib Journal of Information Management","issn_l":"2050-3806","issn":["2050-3806","2050-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319811","host_organization_name":"Emerald Publishing Limited","host_organization_lineage":["https://openalex.org/P4310319811"],"host_organization_lineage_names":["Emerald Publishing Limited"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Aslib Journal of Information Management","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2106.06831","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.06831","pdf_url":"https://arxiv.org/pdf/2106.06831","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2106.06831","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.06831","pdf_url":"https://arxiv.org/pdf/2106.06831","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W99399284","https://openalex.org/W159136832","https://openalex.org/W291753371","https://openalex.org/W1525923969","https://openalex.org/W1594578975","https://openalex.org/W1647671624","https://openalex.org/W1699166917","https://openalex.org/W2022710553","https://openalex.org/W2040062114","https://openalex.org/W2041411104","https://openalex.org/W2047260110","https://openalex.org/W2074231493","https://openalex.org/W2081366726","https://openalex.org/W2103001613","https://openalex.org/W2106568252","https://openalex.org/W2113792477","https://openalex.org/W2138247806","https://openalex.org/W2141708418","https://openalex.org/W2149489787","https://openalex.org/W2151401338","https://openalex.org/W2169800142","https://openalex.org/W2171965515","https://openalex.org/W2182992846","https://openalex.org/W2246237795","https://openalex.org/W2246716434","https://openalex.org/W2337260734","https://openalex.org/W2339954278","https://openalex.org/W2568187431","https://openalex.org/W2575366139","https://openalex.org/W2783946743","https://openalex.org/W2792430722","https://openalex.org/W3123895079","https://openalex.org/W3124878131","https://openalex.org/W6607080108","https://openalex.org/W6948190162"],"related_works":[],"abstract_inverted_index":{"Purpose":[0],"Digitization":[1],"of":[2,41,44,57,95,145,158,170,236],"historical":[3,45,73,125,219],"documents":[4,23,46],"is":[5,19,47,60,80,151,161,227],"a":[6,123,165],"challenging":[7],"task":[8,204],"in":[9,72,84,122,143,181],"many":[10],"digital":[11],"humanities":[12],"projects.":[13],"A":[14,93],"popular":[15],"approach":[16],"for":[17,88,205,221,248],"digitization":[18],"to":[20,61,68,118,195,199,215,231],"scan":[21],"the":[22,39,81,110,129,147,155,159,172,182,201,228,234],"into":[24,30],"images,":[25],"and":[26,50,76,87,101,133,154,243],"then":[27],"convert":[28],"images":[29],"text":[31,74,102,149,180],"using":[32,178],"optical":[33],"character":[34],"recognition":[35],"(OCR)":[36],"algorithms.":[37],"However,":[38],"outcome":[40],"OCR":[42,70,120,206,223,241],"processing":[43],"usually":[48],"inaccurate":[49],"requires":[51],"post-processing":[52],"error":[53],"correction.":[54],"The":[55,115,139,190,208],"purpose":[56],"this":[58,249],"paper":[59],"investigate":[62,233],"how":[63,198],"crowdsourcing":[64,78,203],"can":[65,211],"be":[66,213],"utilized":[67,214],"correct":[69],"errors":[71,121],"collections,":[75],"which":[77],"methodology":[79,210],"most":[82],"effective":[83],"different":[85,98],"scenarios":[86],"various":[89,237],"research":[90],"objectives.":[91],"Design/methodology/approach":[92],"series":[94],"experiments":[96],"with":[97,106,164,185],"micro-task\u2019s":[99],"structures":[100],"lengths":[103],"were":[104,136,175],"conducted":[105],"753":[107],"workers":[108,116],"on":[109,197,239],"Amazon\u2019s":[111],"Mechanical":[112],"Turk":[113],"platform.":[114],"had":[117],"fix":[119],"selected":[124],"text.":[126],"To":[127],"analyze":[128],"results,":[130],"new":[131],"accuracy":[132],"efficiency":[134],"measures":[135],"devised.":[137],"Findings":[138],"analysis":[140],"suggests":[141],"that":[142],"terms":[144,169],"accuracy,":[146],"optimal":[148,156,202,246],"length":[150],"medium":[152],"(paragraph-size)":[153],"structure":[157,184],"experiment":[160],"two":[162],"phase":[163],"scanned":[166],"image.":[167,187],"In":[168],"efficiency,":[171],"best":[173],"results":[174],"obtained":[176],"when":[177],"longer":[179],"single-stage":[183],"no":[186],"Practical":[188],"implications":[189],"study":[191],"provides":[192],"practical":[193],"recommendations":[194],"researchers":[196],"build":[200],"post-correction.":[207,224],"developed":[209],"also":[212],"create":[216],"golden":[217],"standard":[218],"texts":[220],"automatic":[222],"Originality/value":[225],"This":[226],"first":[229],"attempt":[230],"systematically":[232],"influence":[235],"factors":[238],"crowdsourcing-based":[240],"post-correction":[242],"propose":[244],"an":[245],"strategy":[247],"process.":[250]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2020-01-23T00:00:00"}
