{"id":"https://openalex.org/W2009268117","doi":"https://doi.org/10.1145/2494266.2494304","title":"Early modern OCR project (eMOP) at Texas A&amp;M University","display_name":"Early modern OCR project (eMOP) at Texas A&amp;M University","publication_year":2013,"publication_date":"2013-09-03","ids":{"openalex":"https://openalex.org/W2009268117","doi":"https://doi.org/10.1145/2494266.2494304","mag":"2009268117"},"language":"en","primary_location":{"id":"doi:10.1145/2494266.2494304","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2494266.2494304","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2013 ACM symposium on Document engineering","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073169563","display_name":"Katayoun Torabi","orcid":"https://orcid.org/0000-0001-7692-1410"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Katayoun Torabi","raw_affiliation_strings":["Texas A&amp;M University, College Station, TX, USA"],"affiliations":[{"raw_affiliation_string":"Texas A&amp;M University, College Station, TX, USA","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035081348","display_name":"Jessica Durgan","orcid":null},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jessica Durgan","raw_affiliation_strings":["Texas A&amp;M University, College Station, TX, USA"],"affiliations":[{"raw_affiliation_string":"Texas A&amp;M University, College Station, TX, USA","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052376986","display_name":"Bryan Tarpley","orcid":null},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bryan Tarpley","raw_affiliation_strings":["Texas A&amp;M University, College Station, TX, USA"],"affiliations":[{"raw_affiliation_string":"Texas A&amp;M University, College Station, TX, USA","institution_ids":["https://openalex.org/I91045830"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5073169563"],"corresponding_institution_ids":["https://openalex.org/I91045830"],"apc_list":null,"apc_paid":null,"fwci":9.164,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.97476584,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"23","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12377","display_name":"Digital Humanities and Scholarship","score":0.9588000178337097,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12377","display_name":"Digital Humanities and Scholarship","score":0.9588000178337097,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.926800012588501,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.91839998960495,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.7290699481964111},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6879422664642334},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.682004988193512},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5309597849845886},{"id":"https://openalex.org/keywords/digital-library","display_name":"Digital library","score":0.4280293583869934},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.21799427270889282},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.14430350065231323}],"concepts":[{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.7290699481964111},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6879422664642334},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.682004988193512},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5309597849845886},{"id":"https://openalex.org/C513874922","wikidata":"https://www.wikidata.org/wiki/Q212805","display_name":"Digital library","level":3,"score":0.4280293583869934},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21799427270889282},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.14430350065231323},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C164913051","wikidata":"https://www.wikidata.org/wiki/Q482","display_name":"Poetry","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2494266.2494304","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2494266.2494304","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2013 ACM symposium on Document engineering","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6399999856948853}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":2,"referenced_works":["https://openalex.org/W1989737761","https://openalex.org/W2001642682"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W1981780420","https://openalex.org/W2182707996","https://openalex.org/W45233828","https://openalex.org/W2964988449","https://openalex.org/W2397952901","https://openalex.org/W2029380707","https://openalex.org/W4255934811","https://openalex.org/W2465382974"],"abstract_inverted_index":{"Great":[0],"effort":[1],"is":[2,207],"being":[3],"made":[4],"to":[5,47,86,90,98,106,129,138,142,176,190,216,236,239,253,265],"collect":[6],"and":[7,15,26,49,78,135,159,193,222,245,257,267],"preserve":[8,266],"historic":[9,52,92],"manuscripts":[10,56],"from":[11,132],"the":[12,20,64,71,74,100,125,133,152,167,170,195,220,240,242,250],"early":[13,163],"modern":[14,164],"eighteenth-century":[16],"periods;":[17],"unfortunately,":[18],"searching":[19],"Early":[21,65],"English":[22],"Books":[23],"Online":[24,30],"(EEBO)":[25],"Eighteenth":[27],"Century":[28],"Collections":[29],"(ECCO)":[31],"collections":[32,104,137,224],"can":[33],"be":[34],"extremely":[35],"difficult":[36],"for":[37,73,227,231],"researchers":[38],"because":[39],"current":[40],"Optical":[41],"Character":[42],"Recognition":[43],"(OCR)":[44],"engines":[45,89],"struggle":[46],"read":[48,91],"recognize":[50],"various":[51],"fonts,":[53],"especially":[54],"in":[55,96,111,150,259,262],"of":[57,102,127,209],"declining":[58],"quality.":[59],"To":[60],"address":[61],"this":[62,112],"problem,":[63],"Modern":[66],"OCR":[67,88,144],"Project":[68],"(eMOP)":[69],"at":[70,81,124],"Initiative":[72],"Digital":[75],"Humanities,":[76],"Media,":[77],"Culture":[79],"(IDHMC)":[80],"Texas":[82],"A&M":[83],"University":[84,126],"seeks":[85,235],"train":[87],"documents":[93,131],"more":[94,225],"effectively":[95],"order":[97],"make":[99,219,237],"entirety":[101],"these":[103,178],"accessible":[105,226],"searching.":[107],"The":[108],"first":[109],"step":[110],"project":[113,213,252],"involves":[114],"using":[115],"Aletheia":[116,171,206],"Desktop":[117],"Tool,":[118],"developed":[119,248],"by":[120],"PRImA":[121],"Research":[122],"Lab":[123],"Salford,":[128],"use":[130],"EEBO":[134,221],"ECCO":[136,223],"create":[139,177],"training":[140,180,201],"sets":[141],"aid":[143,254],"engines,":[145],"such":[146,155],"as":[147,156],"Google's":[148],"Tesseract,":[149],"recognizing":[151],"special":[153],"characters":[154],"ligatures,":[157],"italics,":[158],"blackletter":[160],"found":[161],"within":[162],"fonts.":[165],"In":[166],"year":[168],"that":[169,197,214],"team":[172],"has":[173],"been":[174],"working":[175],"font":[179],"libraries,":[181,255],"we":[182],"have":[183],"overcome":[184],"several":[185],"problems,":[186],"including":[187],"learning":[188],"how":[189],"select,":[191],"extract,":[192],"deliver":[194],"data":[196,228],"best":[198],"suits":[199],"Tesseract":[200],"requirements.":[202],"This":[203],"work":[204],"with":[205],"part":[208],"a":[210],"larger":[211],"scholarly":[212],"endeavors":[215],"not":[217],"only":[218],"mining":[229],"purposes":[230],"researchers,":[232],"but":[233],"also":[234],"available":[238],"public":[241],"methodologies,":[243],"workflow,":[244],"digital":[246],"tools":[247],"during":[249],"eMOP":[251],"museums,":[256],"scholars":[258],"other":[260],"fields":[261],"their":[263],"efforts":[264],"study":[268],"our":[269],"combined":[270],"cultural":[271],"history.":[272]},"counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
