{"id":"https://openalex.org/W4230543443","doi":"https://doi.org/10.1109/jcdl.2017.7991582","title":"Impact of OCR Errors on the Use of Digital Libraries: Towards a Better Access to Information","display_name":"Impact of OCR Errors on the Use of Digital Libraries: Towards a Better Access to Information","publication_year":2017,"publication_date":"2017-06-01","ids":{"openalex":"https://openalex.org/W4230543443","doi":"https://doi.org/10.1109/jcdl.2017.7991582"},"language":"en","primary_location":{"id":"doi:10.1109/jcdl.2017.7991582","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jcdl.2017.7991582","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 ACM/IEEE Joint Conference on Digital Libraries (JCDL)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hal.science/hal-03025508","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030515939","display_name":"Guillaume Chiron","orcid":"https://orcid.org/0009-0004-3665-4900"},"institutions":[{"id":"https://openalex.org/I4210147660","display_name":"Biblioth\u00e8que Nationale de France","ror":"https://ror.org/04v1bf639","country_code":"FR","type":"archive","lineage":["https://openalex.org/I4210147660"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Guillaume Chiron","raw_affiliation_strings":["National Library of France, Quai Fran\u00e7ois Mauriac, Paris, France","BnF - Biblioth\u00e8que nationale de France (Quai Fran\u00e7ois Mauriac, 75706 Paris Cedex 13 - France)"],"affiliations":[{"raw_affiliation_string":"National Library of France, Quai Fran\u00e7ois Mauriac, Paris, France","institution_ids":["https://openalex.org/I4210147660"]},{"raw_affiliation_string":"BnF - Biblioth\u00e8que nationale de France (Quai Fran\u00e7ois Mauriac, 75706 Paris Cedex 13 - France)","institution_ids":["https://openalex.org/I4210147660"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033491986","display_name":"Antoine Doucet","orcid":"https://orcid.org/0000-0001-6160-3356"},"institutions":[{"id":"https://openalex.org/I78744979","display_name":"La Rochelle Universit\u00e9","ror":"https://ror.org/04mv1z119","country_code":"FR","type":"education","lineage":["https://openalex.org/I78744979"]},{"id":"https://openalex.org/I4403928318","display_name":"Laboratoire Informatique, Image et Interaction (L3i)","ror":"https://ror.org/03r7r0k94","country_code":null,"type":"facility","lineage":["https://openalex.org/I4403928318","https://openalex.org/I78744979"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Antoine Doucet","raw_affiliation_strings":["L3i Lab, University of la Rochelle, La Rochelle, France","L3I - Laboratoire Informatique, Image et Interaction - EA 2118 (B\u00e2timent Pascal Avenue Michel Cr\u00e9peau F-17042 La Rochelle Cedex 1 - France)"],"affiliations":[{"raw_affiliation_string":"L3i Lab, University of la Rochelle, La Rochelle, France","institution_ids":["https://openalex.org/I78744979"]},{"raw_affiliation_string":"L3I - Laboratoire Informatique, Image et Interaction - EA 2118 (B\u00e2timent Pascal Avenue Michel Cr\u00e9peau F-17042 La Rochelle Cedex 1 - France)","institution_ids":["https://openalex.org/I78744979","https://openalex.org/I4403928318"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063992777","display_name":"Micka\u00ebl Coustaty","orcid":"https://orcid.org/0000-0002-0123-439X"},"institutions":[{"id":"https://openalex.org/I4403928318","display_name":"Laboratoire Informatique, Image et Interaction (L3i)","ror":"https://ror.org/03r7r0k94","country_code":null,"type":"facility","lineage":["https://openalex.org/I4403928318","https://openalex.org/I78744979"]},{"id":"https://openalex.org/I78744979","display_name":"La Rochelle Universit\u00e9","ror":"https://ror.org/04mv1z119","country_code":"FR","type":"education","lineage":["https://openalex.org/I78744979"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Mickael Coustaty","raw_affiliation_strings":["L3i Lab, University of la Rochelle, La Rochelle, France","L3I - Laboratoire Informatique, Image et Interaction - EA 2118 (B\u00e2timent Pascal Avenue Michel Cr\u00e9peau F-17042 La Rochelle Cedex 1 - France)"],"affiliations":[{"raw_affiliation_string":"L3i Lab, University of la Rochelle, La Rochelle, France","institution_ids":["https://openalex.org/I78744979"]},{"raw_affiliation_string":"L3I - Laboratoire Informatique, Image et Interaction - EA 2118 (B\u00e2timent Pascal Avenue Michel Cr\u00e9peau F-17042 La Rochelle Cedex 1 - France)","institution_ids":["https://openalex.org/I78744979","https://openalex.org/I4403928318"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089473504","display_name":"Muriel Visani","orcid":"https://orcid.org/0000-0001-7513-4749"},"institutions":[{"id":"https://openalex.org/I4403928318","display_name":"Laboratoire Informatique, Image et Interaction (L3i)","ror":"https://ror.org/03r7r0k94","country_code":null,"type":"facility","lineage":["https://openalex.org/I4403928318","https://openalex.org/I78744979"]},{"id":"https://openalex.org/I78744979","display_name":"La Rochelle Universit\u00e9","ror":"https://ror.org/04mv1z119","country_code":"FR","type":"education","lineage":["https://openalex.org/I78744979"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Muriel Visani","raw_affiliation_strings":["L3i Lab, University of la Rochelle, La Rochelle, France","L3I - Laboratoire Informatique, Image et Interaction - EA 2118 (B\u00e2timent Pascal Avenue Michel Cr\u00e9peau F-17042 La Rochelle Cedex 1 - France)"],"affiliations":[{"raw_affiliation_string":"L3i Lab, University of la Rochelle, La Rochelle, France","institution_ids":["https://openalex.org/I78744979"]},{"raw_affiliation_string":"L3I - Laboratoire Informatique, Image et Interaction - EA 2118 (B\u00e2timent Pascal Avenue Michel Cr\u00e9peau F-17042 La Rochelle Cedex 1 - France)","institution_ids":["https://openalex.org/I78744979","https://openalex.org/I4403928318"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077271843","display_name":"Jean-Philippe Moreux","orcid":null},"institutions":[{"id":"https://openalex.org/I4210147660","display_name":"Biblioth\u00e8que Nationale de France","ror":"https://ror.org/04v1bf639","country_code":"FR","type":"archive","lineage":["https://openalex.org/I4210147660"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Jean-Philippe Moreux","raw_affiliation_strings":["National Library of France, Quai Fran\u00e7ois Mauriac, Paris, France","BnF - Biblioth\u00e8que nationale de France (Quai Fran\u00e7ois Mauriac, 75706 Paris Cedex 13 - France)"],"affiliations":[{"raw_affiliation_string":"National Library of France, Quai Fran\u00e7ois Mauriac, Paris, France","institution_ids":["https://openalex.org/I4210147660"]},{"raw_affiliation_string":"BnF - Biblioth\u00e8que nationale de France (Quai Fran\u00e7ois Mauriac, 75706 Paris Cedex 13 - France)","institution_ids":["https://openalex.org/I4210147660"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5030515939"],"corresponding_institution_ids":["https://openalex.org/I4210147660"],"apc_list":null,"apc_paid":null,"fwci":8.0651,"has_fulltext":false,"cited_by_count":57,"citation_normalized_percentile":{"value":0.97585527,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8032945394515991},{"id":"https://openalex.org/keywords/digital-library","display_name":"Digital library","score":0.7727534770965576},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.7287060618400574},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6206339597702026},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.6078773140907288},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5132391452789307},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5108789801597595},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.46958914399147034},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1996060311794281},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.07626619935035706},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.07429972290992737}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8032945394515991},{"id":"https://openalex.org/C513874922","wikidata":"https://www.wikidata.org/wiki/Q212805","display_name":"Digital library","level":3,"score":0.7727534770965576},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.7287060618400574},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6206339597702026},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.6078773140907288},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5132391452789307},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5108789801597595},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.46958914399147034},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1996060311794281},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.07626619935035706},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.07429972290992737},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C164913051","wikidata":"https://www.wikidata.org/wiki/Q482","display_name":"Poetry","level":2,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/jcdl.2017.7991582","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jcdl.2017.7991582","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 ACM/IEEE Joint Conference on Digital Libraries (JCDL)","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-03025508v1","is_oa":true,"landing_page_url":"https://hal.science/hal-03025508","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2017 ACM/IEEE Joint Conference on Digital Libraries (JCDL), Jun 2017, Toronto, Canada. &#x27E8;10.1109/JCDL.2017.7991582&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-03025508v1","is_oa":true,"landing_page_url":"https://hal.science/hal-03025508","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2017 ACM/IEEE Joint Conference on Digital Libraries (JCDL), Jun 2017, Toronto, Canada. &#x27E8;10.1109/JCDL.2017.7991582&#x27E9;","raw_type":"Conference papers"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7300000190734863,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W96336972","https://openalex.org/W104170146","https://openalex.org/W781702191","https://openalex.org/W1575759921","https://openalex.org/W1990871427","https://openalex.org/W2002006695","https://openalex.org/W2093931624","https://openalex.org/W2123831182","https://openalex.org/W2149551320","https://openalex.org/W2155381731","https://openalex.org/W2168005840","https://openalex.org/W2170037364","https://openalex.org/W2212003014","https://openalex.org/W2404228453","https://openalex.org/W2793452059","https://openalex.org/W2915199312","https://openalex.org/W6622703823","https://openalex.org/W6629926975","https://openalex.org/W6634441671","https://openalex.org/W6682716435"],"related_works":["https://openalex.org/W2032233321","https://openalex.org/W3121970507","https://openalex.org/W2110028391","https://openalex.org/W54497855","https://openalex.org/W217960748","https://openalex.org/W3125814499","https://openalex.org/W2090827041","https://openalex.org/W2565703248","https://openalex.org/W187246281","https://openalex.org/W2143548620"],"abstract_inverted_index":{"Digital":[0],"collections":[1],"are":[2,29,46],"increasingly":[3],"used":[4],"for":[5,58,93,188],"a":[6,77,154],"variety":[7],"of":[8,19,21,70,76,89,118,122,139,164,180],"purposes.":[9],"In":[10,61,106],"Europe":[11],"only,":[12],"we":[13,64,109,177],"can":[14],"conservatively":[15],"estimate":[16,67],"that":[17,193],"tens":[18],"thousands":[20],"users":[22],"consult":[23],"digital":[24,83,213],"libraries":[25],"daily.":[26],"The":[27,81],"usages":[28],"often":[30],"motivated":[31],"by":[32],"qualitative":[33],"and":[34,99,133,141],"quantitative":[35],"research.":[36],"However,":[37],"caution":[38],"must":[39],"be":[40],"advised":[41],"as":[42],"most":[43],"digitized":[44],"documents":[45,98,120],"indexed":[47],"through":[48],"their":[49],"OCRed":[50,97,119],"version,":[51],"which":[52,208],"is":[53,131],"far":[54],"from":[55,85],"perfect,":[56],"especially":[57],"ancient":[59],"documents.":[60,143],"this":[62,107,160],"paper,":[63],"aim":[65],"to":[66,153,170,184,200,207],"the":[68,74,86,127,166,171,190,204],"impact":[69],"OCR":[71,147,201,209],"errors":[72,148],"on":[73,146,212],"use":[75,163],"major":[78],"online":[79],"platform:":[80],"Gallica":[82,172],"library":[84,214],"National":[87],"Library":[88],"France.":[90],"It":[91],"accounts":[92],"more":[94],"than":[95],"100M":[96],"receives":[100],"80M":[101],"search":[102],"queries":[103,168],"every":[104],"year.":[105],"context,":[108],"introduce":[110],"two":[111],"main":[112],"contributions.":[113],"First,":[114],"an":[115,136,186],"original":[116],"corpus":[117],"composed":[121],"12M":[123],"characters":[124],"along":[125],"with":[126,135],"corresponding":[128],"gold":[129],"standard":[130],"presented":[132],"provided,":[134],"equal":[137],"share":[138],"English-":[140],"French-written":[142],"Next,":[144],"statistics":[145],"have":[149],"been":[150],"computed":[151],"thanks":[152],"novel":[155],"alignment":[156],"method":[157],"introduced":[158],"in":[159],"paper.":[161],"Making":[162],"all":[165],"user":[167],"submitted":[169],"portal":[173],"over":[174],"4":[175],"months,":[176],"take":[178],"advantage":[179],"our":[181],"error":[182],"model":[183],"propose":[185],"indicator":[187],"predicting":[189],"relative":[191],"risk":[192],"queried":[194],"terms":[195],"mismatch":[196],"targeted":[197],"resources":[198],"due":[199],"errors,":[202],"underlining":[203],"critical":[205],"extent":[206],"quality":[210],"impacts":[211],"access.":[215]},"counts_by_year":[{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":10},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":7},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2022-05-11T00:00:00"}
