{"id":"https://openalex.org/W2172204930","doi":"https://doi.org/10.1093/llc/fqt007","title":"Automatic extraction of catalog data from digital images of historical manuscripts","display_name":"Automatic extraction of catalog data from digital images of historical manuscripts","publication_year":2013,"publication_date":"2013-02-23","ids":{"openalex":"https://openalex.org/W2172204930","doi":"https://doi.org/10.1093/llc/fqt007","mag":"2172204930"},"language":"en","primary_location":{"id":"doi:10.1093/llc/fqt007","is_oa":false,"landing_page_url":"https://doi.org/10.1093/llc/fqt007","pdf_url":null,"source":{"id":"https://openalex.org/S84784070","display_name":"Literary and Linguistic Computing","issn_l":"0268-1145","issn":["0268-1145","1477-4615"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Literary and Linguistic Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069152551","display_name":"Roni Shweka","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"R. Shweka","raw_affiliation_strings":["The Friedberg Genizah Project, Jerusalem, Israel"],"affiliations":[{"raw_affiliation_string":"The Friedberg Genizah Project, Jerusalem, Israel","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002775368","display_name":"Yaacov Choueka","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Y. Choueka","raw_affiliation_strings":["The Friedberg Genizah Project, Jerusalem, Israel"],"affiliations":[{"raw_affiliation_string":"The Friedberg Genizah Project, Jerusalem, Israel","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078102229","display_name":"Lior Wolf","orcid":"https://orcid.org/0000-0001-5578-8892"},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"L. Wolf","raw_affiliation_strings":["The Blavatnik School of Computer Science, Tel Aviv University, Ramat Aviv, Israel"],"affiliations":[{"raw_affiliation_string":"The Blavatnik School of Computer Science, Tel Aviv University, Ramat Aviv, Israel","institution_ids":["https://openalex.org/I16391192"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039993352","display_name":"Nachum Dershowitz","orcid":"https://orcid.org/0000-0003-0363-2735"},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"N. Dershowitz","raw_affiliation_strings":["The Blavatnik School of Computer Science, Tel Aviv University, Ramat Aviv, Israel"],"affiliations":[{"raw_affiliation_string":"The Blavatnik School of Computer Science, Tel Aviv University, Ramat Aviv, Israel","institution_ids":["https://openalex.org/I16391192"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5069152551"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.5529,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.73202309,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"28","issue":"2","first_page":"315","last_page":"330"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/genizah","display_name":"Genizah","score":0.9754288196563721},{"id":"https://openalex.org/keywords/tel-aviv","display_name":"Tel aviv","score":0.9355145692825317},{"id":"https://openalex.org/keywords/library-science","display_name":"Library science","score":0.4362131953239441},{"id":"https://openalex.org/keywords/history","display_name":"History","score":0.4188273847103119},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3395598232746124},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.3345484733581543},{"id":"https://openalex.org/keywords/art-history","display_name":"Art history","score":0.3332173228263855},{"id":"https://openalex.org/keywords/classics","display_name":"Classics","score":0.3218095898628235},{"id":"https://openalex.org/keywords/judaism","display_name":"Judaism","score":0.24904054403305054},{"id":"https://openalex.org/keywords/archaeology","display_name":"Archaeology","score":0.1600724458694458}],"concepts":[{"id":"https://openalex.org/C109901321","wikidata":"https://www.wikidata.org/wiki/Q855266","display_name":"Genizah","level":3,"score":0.9754288196563721},{"id":"https://openalex.org/C3020510925","wikidata":"https://www.wikidata.org/wiki/Q33935","display_name":"Tel aviv","level":2,"score":0.9355145692825317},{"id":"https://openalex.org/C161191863","wikidata":"https://www.wikidata.org/wiki/Q199655","display_name":"Library science","level":1,"score":0.4362131953239441},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.4188273847103119},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3395598232746124},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.3345484733581543},{"id":"https://openalex.org/C52119013","wikidata":"https://www.wikidata.org/wiki/Q50637","display_name":"Art history","level":1,"score":0.3332173228263855},{"id":"https://openalex.org/C74916050","wikidata":"https://www.wikidata.org/wiki/Q841090","display_name":"Classics","level":1,"score":0.3218095898628235},{"id":"https://openalex.org/C150152722","wikidata":"https://www.wikidata.org/wiki/Q9268","display_name":"Judaism","level":2,"score":0.24904054403305054},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.1600724458694458}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1093/llc/fqt007","is_oa":false,"landing_page_url":"https://doi.org/10.1093/llc/fqt007","pdf_url":null,"source":{"id":"https://openalex.org/S84784070","display_name":"Literary and Linguistic Computing","issn_l":"0268-1145","issn":["0268-1145","1477-4615"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Literary and Linguistic Computing","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.228.3019","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.228.3019","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.tau.ac.il/%7Enachumd/papers/AutomaticExtraction.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.404.3992","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.404.3992","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.tau.ac.il/~nachumd/papers/AutomaticExtraction.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7699999809265137,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320320273","display_name":"University of Cambridge","ror":"https://ror.org/013meh722"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W632419227","https://openalex.org/W1538406063","https://openalex.org/W1625255723","https://openalex.org/W1782590233","https://openalex.org/W1993636010","https://openalex.org/W2045458507","https://openalex.org/W2085261163","https://openalex.org/W2105950236","https://openalex.org/W2124054731","https://openalex.org/W2128060444","https://openalex.org/W2129884094","https://openalex.org/W2139434569","https://openalex.org/W2151103935","https://openalex.org/W2162915993","https://openalex.org/W2252355447","https://openalex.org/W2551151849","https://openalex.org/W2949673512","https://openalex.org/W3147438222","https://openalex.org/W4251553584"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2996910546","https://openalex.org/W1423973944","https://openalex.org/W2256003138","https://openalex.org/W2319259419","https://openalex.org/W4387405973","https://openalex.org/W4234580134","https://openalex.org/W2158203244","https://openalex.org/W2316814541","https://openalex.org/W2119640341"],"abstract_inverted_index":{"The":[0,24],"Cairo":[1,195],"Genizah":[2,196],"is":[3,41],"a":[4,73,151,178],"collection":[5],"of":[6,14,56,58,64,94,99,108,154],"handwritten":[7],"historical":[8],"documents":[9],"containing":[10],"approximately":[11],"350,000":[12],"fragments":[13,25,125],"mainly":[15],"Jewish":[16],"texts":[17],"discovered":[18,159],"in":[19,30,72],"the":[20,65,81,100,105,130,168,182,188,194],"late":[21],"19th":[22],"century.":[23],"are":[26],"today":[27],"spread":[28],"out":[29],"more":[31],"than":[32],"70":[33],"libraries":[34],"and":[35,39,47,97,116,148,186,190],"private":[36],"collections":[37],"worldwide,":[38],"there":[40],"an":[42],"ongoing":[43],"effort":[44,170],"to":[45,86,123,139,160,171],"document":[46],"catalog":[48,59,110],"all":[49,173],"extant":[50],"fragments.":[51,66],"We":[52],"explore":[53],"three":[54,174],"levels":[55,175],"extraction":[57],"data":[60],"from":[61,129],"digital":[62],"images":[63,68,82],"First,":[67],"should":[69],"be":[70,84,121,167],"captured":[71],"way":[74],"that":[75,126],"permits":[76],"standardized":[77],"automatic":[78,106],"processing.":[79],"Second,":[80],"can":[83],"processed":[85],"detect":[87],"elements":[88],"such":[89],"as":[90,140],"image":[91],"foreground,":[92],"regions":[93],"written":[95],"text":[96],"lines":[98],"text,":[101],"thereby":[102],"allowing":[103],"for":[104,193],"assignment":[107],"conventional":[109],"measurements.":[111],"Third,":[112],"modern":[113],"computer-vision":[114],"tools":[115],"statistical":[117],"inference":[118],"techniques":[119,189],"may":[120],"employed":[122],"identify":[124],"might":[127,166],"originate":[128],"same":[131],"original":[132],"codex.":[133],"Such":[134],"matched":[135],"fragments,":[136],"commonly":[137],"referred":[138],"\u201cjoins\u201d,":[141],"were":[142],"heretofore":[143],"identified":[144],"manually":[145],"by":[146],"experts,":[147],"presumably":[149],"only":[150],"small":[152],"fraction":[153],"existing":[155],"joins":[156],"have":[157],"been":[158],"date.":[161],"Overall,":[162],"we":[163],"present":[164],"what":[165],"first":[169],"address":[172],"successfully":[176],"within":[177],"large-scale":[179],"project,":[180],"detailing":[181],"various":[183],"design":[184],"choices":[185],"describing":[187],"algorithms":[191],"used":[192],"digitization":[197],"project.":[198]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
