{"id":"https://openalex.org/W2288418054","doi":"https://doi.org/10.1109/digitalheritage.2015.7413829","title":"Customised OCR correction for historical medical text","display_name":"Customised OCR correction for historical medical text","publication_year":2015,"publication_date":"2015-09-01","ids":{"openalex":"https://openalex.org/W2288418054","doi":"https://doi.org/10.1109/digitalheritage.2015.7413829","mag":"2288418054"},"language":"en","primary_location":{"id":"doi:10.1109/digitalheritage.2015.7413829","is_oa":false,"landing_page_url":"https://doi.org/10.1109/digitalheritage.2015.7413829","pdf_url":null,"source":{"id":"https://openalex.org/S4306497942","display_name":"2015 Digital Heritage","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 Digital Heritage","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100606557","display_name":"Paul M. Thompson","orcid":"https://orcid.org/0000-0002-4720-8867"},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Paul Thompson","raw_affiliation_strings":["National Centre for Text Mining, University of Manchester, Manchester, UK","National Centre for Text Mining, School of Computer Science, University of Manchester, UK"],"affiliations":[{"raw_affiliation_string":"National Centre for Text Mining, University of Manchester, Manchester, UK","institution_ids":["https://openalex.org/I28407311"]},{"raw_affiliation_string":"National Centre for Text Mining, School of Computer Science, University of Manchester, UK","institution_ids":["https://openalex.org/I28407311"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110478635","display_name":"John McNaught","orcid":null},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"John McNaught","raw_affiliation_strings":["The University of Manchester, Manchester, Manchester, GB","National Centre for Text Mining, School of Computer Science, University of Manchester, UK"],"affiliations":[{"raw_affiliation_string":"The University of Manchester, Manchester, Manchester, GB","institution_ids":["https://openalex.org/I28407311"]},{"raw_affiliation_string":"National Centre for Text Mining, School of Computer Science, University of Manchester, UK","institution_ids":["https://openalex.org/I28407311"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077976343","display_name":"Sophia Ananiadou","orcid":"https://orcid.org/0000-0002-4097-9191"},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Sophia Ananiadou","raw_affiliation_strings":["National Centre for Text Mining, University of Manchester, Manchester, UK","National Centre for Text Mining, School of Computer Science, University of Manchester, UK"],"affiliations":[{"raw_affiliation_string":"National Centre for Text Mining, University of Manchester, Manchester, UK","institution_ids":["https://openalex.org/I28407311"]},{"raw_affiliation_string":"National Centre for Text Mining, School of Computer Science, University of Manchester, UK","institution_ids":["https://openalex.org/I28407311"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100606557"],"corresponding_institution_ids":["https://openalex.org/I28407311"],"apc_list":null,"apc_paid":null,"fwci":1.6569,"has_fulltext":false,"cited_by_count":37,"citation_normalized_percentile":{"value":0.89591638,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"35","last_page":"42"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8346959352493286},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.7890932559967041},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6055464148521423},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5796704292297363},{"id":"https://openalex.org/keywords/spell","display_name":"Spell","score":0.5587810277938843},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5176175832748413},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5024068355560303},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4780626595020294},{"id":"https://openalex.org/keywords/historical-document","display_name":"Historical document","score":0.477285772562027},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4585544466972351},{"id":"https://openalex.org/keywords/subject","display_name":"Subject (documents)","score":0.4532202482223511},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.45056429505348206},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.4155784845352173},{"id":"https://openalex.org/keywords/section","display_name":"Section (typography)","score":0.41347137093544006},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.27124565839767456},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.19056275486946106},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.13570639491081238}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8346959352493286},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.7890932559967041},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6055464148521423},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5796704292297363},{"id":"https://openalex.org/C2780957641","wikidata":"https://www.wikidata.org/wiki/Q1999796","display_name":"Spell","level":2,"score":0.5587810277938843},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5176175832748413},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5024068355560303},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4780626595020294},{"id":"https://openalex.org/C2778371909","wikidata":"https://www.wikidata.org/wiki/Q3771738","display_name":"Historical document","level":2,"score":0.477285772562027},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4585544466972351},{"id":"https://openalex.org/C2777855551","wikidata":"https://www.wikidata.org/wiki/Q12310021","display_name":"Subject (documents)","level":2,"score":0.4532202482223511},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.45056429505348206},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.4155784845352173},{"id":"https://openalex.org/C2780129039","wikidata":"https://www.wikidata.org/wiki/Q1931107","display_name":"Section (typography)","level":2,"score":0.41347137093544006},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.27124565839767456},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.19056275486946106},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.13570639491081238},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/digitalheritage.2015.7413829","is_oa":false,"landing_page_url":"https://doi.org/10.1109/digitalheritage.2015.7413829","pdf_url":null,"source":{"id":"https://openalex.org/S4306497942","display_name":"2015 Digital Heritage","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 Digital Heritage","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.atira.dk:openaire_cris_publications/1a80e7c4-9239-4bc9-8fae-c2eb0382f781","is_oa":false,"landing_page_url":"https://research.manchester.ac.uk/en/publications/1a80e7c4-9239-4bc9-8fae-c2eb0382f781","pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Thompson, P, McNaught, J & Ananiadou, S 2016, Customised OCR correction for historical medical text. in G Guidi, R Scopigno, J C Torres & H Graf (eds), 2015 Digital Heritage. vol. 1, IEEE, pp. 35-41, Digital Heritage 2015, Granada, Spain, 28/09/15. https://doi.org/10.1109/DigitalHeritage.2015.7413829","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:pure.atira.dk:publications/1a80e7c4-9239-4bc9-8fae-c2eb0382f781","is_oa":false,"landing_page_url":"https://www.research.manchester.ac.uk/portal/en/publications/customised-ocr-correction-for-historical-medical-text(1a80e7c4-9239-4bc9-8fae-c2eb0382f781).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Thompson, P, McNaught, J & Ananiadou, S 2016, Customised OCR correction for historical medical text. in G Guidi, R Scopigno, J C Torres & H Graf (eds), 2015 Digital Heritage. vol. 1, IEEE, pp. 35-41, Digital Heritage 2015, Granada, Spain, 28/09/15. https://doi.org/10.1109/DigitalHeritage.2015.7413829","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"No poverty","id":"https://metadata.un.org/sdg/1","score":0.4000000059604645}],"awards":[{"id":"https://openalex.org/G6249904315","display_name":null,"funder_award_id":"AH/L00982X/1","funder_id":"https://openalex.org/F4320334609","funder_display_name":"Arts and Humanities Research Council"}],"funders":[{"id":"https://openalex.org/F4320334609","display_name":"Arts and Humanities Research Council","ror":"https://ror.org/0505m1554"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W104170146","https://openalex.org/W1594578975","https://openalex.org/W1699166917","https://openalex.org/W1843650355","https://openalex.org/W1994086054","https://openalex.org/W2000447173","https://openalex.org/W2002006695","https://openalex.org/W2011084371","https://openalex.org/W2018840051","https://openalex.org/W2031966872","https://openalex.org/W2032223184","https://openalex.org/W2063420976","https://openalex.org/W2065635299","https://openalex.org/W2068971329","https://openalex.org/W2069172670","https://openalex.org/W2069189382","https://openalex.org/W2071003614","https://openalex.org/W2078365326","https://openalex.org/W2109368679","https://openalex.org/W2124990912","https://openalex.org/W2126686557","https://openalex.org/W2142268730","https://openalex.org/W2145088298","https://openalex.org/W2148395543","https://openalex.org/W2151268132","https://openalex.org/W2151504427","https://openalex.org/W2151956076","https://openalex.org/W2155381731","https://openalex.org/W2171950509","https://openalex.org/W2251942609","https://openalex.org/W2793452059","https://openalex.org/W2799678146","https://openalex.org/W2811491803","https://openalex.org/W3151019514","https://openalex.org/W4252354914","https://openalex.org/W6638901723","https://openalex.org/W6655087157","https://openalex.org/W6682438565","https://openalex.org/W6682716435","https://openalex.org/W6691451877","https://openalex.org/W6750849010","https://openalex.org/W6752932785","https://openalex.org/W7047636377"],"related_works":["https://openalex.org/W1997182898","https://openalex.org/W4253079800","https://openalex.org/W2784287639","https://openalex.org/W3023805750","https://openalex.org/W4294702218","https://openalex.org/W2119179626","https://openalex.org/W4308094881","https://openalex.org/W2038329042","https://openalex.org/W2067601176","https://openalex.org/W2131730163"],"abstract_inverted_index":{"Historical":[0],"text":[1,43,79,101],"archives":[2],"constitute":[3],"a":[4,45,108,127],"rich":[5],"and":[6,61,72],"diverse":[7],"source":[8],"of":[9,48,84,99,123,145,153,167],"information,":[10],"which":[11],"is":[12,24],"becoming":[13],"increasingly":[14],"readily":[15],"accessible,":[16],"owing":[17],"to":[18,34,56,148,158,172],"large-scale":[19],"digitisation":[20],"efforts.":[21],"Searchable":[22],"access":[23],"typically":[25,54],"provided":[26],"by":[27,135,170],"applying":[28],"Optical":[29],"Character":[30],"Recognition":[31],"(OCR)":[32],"software":[33],"scanned":[35],"page":[36],"images.":[37],"Often,":[38],"however,":[39],"the":[40,82,97,142,146],"automatically":[41],"recognised":[42],"contains":[44],"large":[46],"number":[47],"errors,":[49],"since":[50],"OCR":[51,78,110,160],"systems":[52,86,92],"are":[53,94,133],"optimised":[55],"deal":[57],"with":[58,64,126],"modern":[59],"documents,":[60],"can":[62,80],"struggle":[63],"historical":[65,88,115],"document":[66],"features,":[67],"including":[68],"variable":[69],"print":[70],"characteristics":[71],"archaic":[73],"vocabulary":[74],"usage.":[75],"Low":[76],"quality":[77],"reduce":[81],"efficiency":[83],"search":[85],"over":[87],"archives,":[89],"particularly":[90],"semantic":[91],"that":[93],"based":[95],"on":[96,107],"application":[98],"sophisticated":[100],"mining":[102],"(TM)":[103],"techniques.":[104],"We":[105],"report":[106],"new":[109],"correction":[111,122],"strategy,":[112,130],"customised":[113],"for":[114],"medical":[116],"documents.":[117],"The":[118,151],"method":[119,155],"combines":[120],"rule-based":[121],"regular":[124],"errors":[125],"medically-tuned":[128],"spell-checking":[129],"whose":[131],"corrections":[132],"guided":[134],"information":[136],"about":[137],"subject-specific":[138],"language":[139],"usage":[140],"from":[141],"publication":[143],"period":[144],"article":[147],"be":[149],"corrected.":[150],"performance":[152],"our":[154],"compares":[156],"favourably":[157],"other":[159],"post-correction":[161],"strategies,":[162],"in":[163],"improving":[164],"word-level":[165],"accuracy":[166],"poor-quality":[168],"documents":[169],"up":[171],"16%.":[173]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
