{"id":"https://openalex.org/W2564757993","doi":"https://doi.org/10.18653/v1/w16-6108","title":"Low-resource OCR error detection and correction in French Clinical Texts","display_name":"Low-resource OCR error detection and correction in French Clinical Texts","publication_year":2016,"publication_date":"2016-01-01","ids":{"openalex":"https://openalex.org/W2564757993","doi":"https://doi.org/10.18653/v1/w16-6108","mag":"2564757993"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w16-6108","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-6108","pdf_url":"https://www.aclweb.org/anthology/W16-6108.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Seventh International Workshop on Health Text\n          Mining and Information Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W16-6108.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084625140","display_name":"Eva D\u2019Hondt","orcid":"https://orcid.org/0000-0001-5646-2261"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Eva D'hondt","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047002517","display_name":"Cyril Grouin","orcid":"https://orcid.org/0000-0001-5809-188X"},"institutions":[{"id":"https://openalex.org/I4210090571","display_name":"Institut des Sciences des Plantes de Paris Saclay","ror":"https://ror.org/00ajjta07","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I204730241","https://openalex.org/I277688954","https://openalex.org/I277688954","https://openalex.org/I4210088668","https://openalex.org/I4210090571","https://openalex.org/I88467170"]},{"id":"https://openalex.org/I277688954","display_name":"Universit\u00e9 Paris-Saclay","ror":"https://ror.org/03xjwb503","country_code":"FR","type":"education","lineage":["https://openalex.org/I277688954"]},{"id":"https://openalex.org/I4210115485","display_name":"Laboratoire d'Informatique pour la M\u00e9canique et les Sciences de l'Ing\u00e9nieur","ror":"https://ror.org/01raq4x89","country_code":"FR","type":"facility","lineage":["https://openalex.org/I102197404","https://openalex.org/I1294671590","https://openalex.org/I4210115485","https://openalex.org/I4210159245"]},{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Cyril Grouin","raw_affiliation_strings":["LIMSI, CNRS Universit Paris-Saclay F-91405 Orsay"],"affiliations":[{"raw_affiliation_string":"LIMSI, CNRS Universit Paris-Saclay F-91405 Orsay","institution_ids":["https://openalex.org/I277688954","https://openalex.org/I4210090571","https://openalex.org/I4210115485","https://openalex.org/I1294671590"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023203421","display_name":"Brigitte Grau","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Brigitte Grau","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5084625140"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.5229,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.72933852,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9509000182151794,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9509000182151794,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7419999837875366},{"id":"https://openalex.org/keywords/error-detection-and-correction","display_name":"Error detection and correction","score":0.68021160364151},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.5149460434913635},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.47962817549705505},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.45726996660232544},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44744354486465454},{"id":"https://openalex.org/keywords/error-analysis","display_name":"Error analysis","score":0.44203850626945496},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.38995710015296936},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.16007527709007263},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08274185657501221},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.08250656723976135}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7419999837875366},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.68021160364151},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.5149460434913635},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.47962817549705505},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.45726996660232544},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44744354486465454},{"id":"https://openalex.org/C3018824978","wikidata":"https://www.wikidata.org/wiki/Q2894891","display_name":"Error analysis","level":2,"score":0.44203850626945496},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.38995710015296936},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.16007527709007263},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08274185657501221},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.08250656723976135},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.18653/v1/w16-6108","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-6108","pdf_url":"https://www.aclweb.org/anthology/W16-6108.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Seventh International Workshop on Health Text\n          Mining and Information Analysis","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-01831225v1","is_oa":true,"landing_page_url":"https://hal.science/hal-01831225","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"International Workshop on Health Text Mining and Information Analysis, ACL, Nov 2016, Austin, United States. pp.61-68, &#x27E8;10.18653/v1/W16-6108&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":{"id":"doi:10.18653/v1/w16-6108","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-6108","pdf_url":"https://www.aclweb.org/anthology/W16-6108.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Seventh International Workshop on Health Text\n          Mining and Information Analysis","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2675002076","display_name":"Contents and Knowledge aggregation for case-based reasoning in the field of f\u0153tal dysmorphology","funder_award_id":"ANR-12-CORD-0007","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G6702880738","display_name":null,"funder_award_id":"ANR-12","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"}],"funders":[{"id":"https://openalex.org/F4320320883","display_name":"Agence Nationale de la Recherche","ror":"https://ror.org/00rbzpz17"},{"id":"https://openalex.org/F4320338463","display_name":"CHIST-ERA","ror":"https://ror.org/00rbzpz17"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2564757993.pdf","grobid_xml":"https://content.openalex.org/works/W2564757993.grobid-xml"},"referenced_works_count":16,"referenced_works":["https://openalex.org/W73263825","https://openalex.org/W647094781","https://openalex.org/W1503858070","https://openalex.org/W1594578975","https://openalex.org/W1810943226","https://openalex.org/W1990871427","https://openalex.org/W1995616412","https://openalex.org/W2006642610","https://openalex.org/W2010595692","https://openalex.org/W2014468488","https://openalex.org/W2047178861","https://openalex.org/W2060690769","https://openalex.org/W2095705004","https://openalex.org/W2186058360","https://openalex.org/W2288418054","https://openalex.org/W2440474652"],"related_works":["https://openalex.org/W2292997772","https://openalex.org/W170731741","https://openalex.org/W2055970094","https://openalex.org/W4377970376","https://openalex.org/W4318615658","https://openalex.org/W240009282","https://openalex.org/W2575782020","https://openalex.org/W2380243770","https://openalex.org/W2377495875","https://openalex.org/W2390973415"],"abstract_inverted_index":{"In":[0],"this":[1],"paper":[2],"we":[3,119],"present":[4],"a":[5,18,74,80,107,181,194],"simple":[6],"yet":[7],"effective":[8],"approach":[9],"to":[10,89,99,133,148],"automatic":[11],"OCR":[12,26,35,50,85,152],"error":[13,36],"detection":[14,37],"and":[15,38,128,135,176],"correction":[16,39],"on":[17,43,67,103,180],"corpus":[19,82],"of":[20,24,31,83,151,169],"French":[21],"clinical":[22],"reports":[23],"variable":[25],"quality":[27,86,153],"within":[28],"the":[29,64,116,123,140,149,173,189],"domain":[30],"foetopathology.":[32],"While":[33],"traditional":[34],"systems":[40],"rely":[41],"heavily":[42],"external":[44,159],"information":[45,52,102,160],"such":[46,161],"as":[47,129,131,162],"domain-specific":[48],"lexicons,":[49],"process":[51,118,175],"or":[53],"manually":[54,182],"corrected":[55,183],"training":[56,117,174],"material,":[57],"these":[58],"are":[59,146],"not":[60,156],"always":[61],"available":[62],"given":[63,108],"constraints":[65],"placed":[66],"using":[68],"medical":[69,109],"corpora.":[70],"We":[71,164,186],"therefore":[72],"propose":[73],"novel":[75],"method":[76,93],"that":[77,188],"only":[78],"needs":[79],"representative":[81],"acceptable":[84],"in":[87],"order":[88],"train":[90],"models.":[91],"Our":[92],"uses":[94],"recurrent":[95],"neural":[96],"networks":[97],"(RNNs)":[98],"model":[100,127],"sequential":[101],"character":[104],"level":[105],"for":[106],"text":[110],"corpus.":[111],"By":[112],"inserting":[113],"noise":[114,138,171],"during":[115],"can":[120],"simultaneously":[121],"learn":[122],"underlying":[124],"(character-level)":[125],"language":[126],"well":[130],"learning":[132],"detect":[134],"eliminate":[136],"random":[137],"from":[139],"textual":[141],"input.":[142],"The":[143],"resulting":[144],"models":[145,179],"robust":[147],"variability":[150],"but":[154],"do":[155],"require":[157],"additional,":[158],"lexicons.":[163],"compare":[165],"two":[166],"different":[167],"ways":[168],"injecting":[170],"into":[172],"evaluate":[177],"our":[178],"data":[184],"set.":[185],"find":[187],"best":[190],"performing":[191],"system":[192],"achieves":[193],"73%":[195],"accuracy.":[196]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
